2010-11-18 Dirk Pranke <dpranke@chromium.org>
[WebKit.git] / WebKitTools / Scripts / webkitpy / layout_tests / run_webkit_tests.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """Run layout tests.
32
33 This is a port of the existing webkit test script run-webkit-tests.
34
35 The TestRunner class runs a series of tests (TestType interface) against a set
36 of test files.  If a test file fails a TestType, it returns a list TestFailure
37 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
38 create a final report.
39
40 This script reads several files, if they exist in the test_lists subdirectory
41 next to this script itself.  Each should contain a list of paths to individual
42 tests or entire subdirectories of tests, relative to the outermost test
43 directory.  Entire lines starting with '//' (comments) will be ignored.
44
45 For details of the files' contents and purposes, see test_lists/README.
46 """
47
48 from __future__ import with_statement
49
50 import codecs
51 import errno
52 import glob
53 import logging
54 import math
55 import optparse
56 import os
57 import platform
58 import Queue
59 import random
60 import re
61 import shutil
62 import signal
63 import sys
64 import time
65 import traceback
66
67 from layout_package import dump_render_tree_thread
68 from layout_package import json_layout_results_generator
69 from layout_package import message_broker
70 from layout_package import printing
71 from layout_package import test_expectations
72 from layout_package import test_failures
73 from layout_package import test_results
74 from layout_package import test_results_uploader
75 from test_types import image_diff
76 from test_types import text_diff
77 from test_types import test_type_base
78
79 from webkitpy.common.system import user
80 from webkitpy.thirdparty import simplejson
81
82 import port
83
84 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
85
86 # Builder base URL where we have the archived test results.
87 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
88
89 LAYOUT_TESTS_DIRECTORY = "LayoutTests" + os.sep
90
91 TestExpectationsFile = test_expectations.TestExpectationsFile
92
93
94 class TestInput:
95     """Groups information about a test for easy passing of data."""
96
97     def __init__(self, filename, timeout):
98         """Holds the input parameters for a test.
99         Args:
100           filename: Full path to the test.
101           timeout: Timeout in msecs the driver should use while running the test
102           """
103         # FIXME: filename should really be test_name as a relative path.
104         self.filename = filename
105         self.timeout = timeout
106
107
108 class ResultSummary(object):
109     """A class for partitioning the test results we get into buckets.
110
111     This class is basically a glorified struct and it's private to this file
112     so we don't bother with any information hiding."""
113
114     def __init__(self, expectations, test_files):
115         self.total = len(test_files)
116         self.remaining = self.total
117         self.expectations = expectations
118         self.expected = 0
119         self.unexpected = 0
120         self.tests_by_expectation = {}
121         self.tests_by_timeline = {}
122         self.results = {}
123         self.unexpected_results = {}
124         self.failures = {}
125         self.tests_by_expectation[test_expectations.SKIP] = set()
126         for expectation in TestExpectationsFile.EXPECTATIONS.values():
127             self.tests_by_expectation[expectation] = set()
128         for timeline in TestExpectationsFile.TIMELINES.values():
129             self.tests_by_timeline[timeline] = (
130                 expectations.get_tests_with_timeline(timeline))
131
132     def add(self, result, expected):
133         """Add a TestResult into the appropriate bin.
134
135         Args:
136           result: TestResult from dump_render_tree_thread.
137           expected: whether the result was what we expected it to be.
138         """
139
140         self.tests_by_expectation[result.type].add(result.filename)
141         self.results[result.filename] = result
142         self.remaining -= 1
143         if len(result.failures):
144             self.failures[result.filename] = result.failures
145         if expected:
146             self.expected += 1
147         else:
148             self.unexpected_results[result.filename] = result.type
149             self.unexpected += 1
150
151
152 def summarize_unexpected_results(port_obj, expectations, result_summary,
153                                  retry_summary):
154     """Summarize any unexpected results as a dict.
155
156     FIXME: split this data structure into a separate class?
157
158     Args:
159         port_obj: interface to port-specific hooks
160         expectations: test_expectations.TestExpectations object
161         result_summary: summary object from initial test runs
162         retry_summary: summary object from final test run of retried tests
163     Returns:
164         A dictionary containing a summary of the unexpected results from the
165         run, with the following fields:
166         'version': a version indicator (1 in this version)
167         'fixable': # of fixable tests (NOW - PASS)
168         'skipped': # of skipped tests (NOW & SKIPPED)
169         'num_regressions': # of non-flaky failures
170         'num_flaky': # of flaky failures
171         'num_passes': # of unexpected passes
172         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
173     """
174     results = {}
175     results['version'] = 1
176
177     tbe = result_summary.tests_by_expectation
178     tbt = result_summary.tests_by_timeline
179     results['fixable'] = len(tbt[test_expectations.NOW] -
180                                 tbe[test_expectations.PASS])
181     results['skipped'] = len(tbt[test_expectations.NOW] &
182                                 tbe[test_expectations.SKIP])
183
184     num_passes = 0
185     num_flaky = 0
186     num_regressions = 0
187     keywords = {}
188     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
189         keywords[v] = k.upper()
190
191     tests = {}
192     for filename, result in result_summary.unexpected_results.iteritems():
193         # Note that if a test crashed in the original run, we ignore
194         # whether or not it crashed when we retried it (if we retried it),
195         # and always consider the result not flaky.
196         test = port_obj.relative_test_filename(filename)
197         expected = expectations.get_expectations_string(filename)
198         actual = [keywords[result]]
199
200         if result == test_expectations.PASS:
201             num_passes += 1
202         elif result == test_expectations.CRASH:
203             num_regressions += 1
204         else:
205             if filename not in retry_summary.unexpected_results:
206                 actual.extend(expectations.get_expectations_string(
207                     filename).split(" "))
208                 num_flaky += 1
209             else:
210                 retry_result = retry_summary.unexpected_results[filename]
211                 if result != retry_result:
212                     actual.append(keywords[retry_result])
213                     num_flaky += 1
214                 else:
215                     num_regressions += 1
216
217         tests[test] = {}
218         tests[test]['expected'] = expected
219         tests[test]['actual'] = " ".join(actual)
220
221     results['tests'] = tests
222     results['num_passes'] = num_passes
223     results['num_flaky'] = num_flaky
224     results['num_regressions'] = num_regressions
225
226     return results
227
228
229 class TestRunner:
230     """A class for managing running a series of tests on a series of layout
231     test files."""
232
233     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
234     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
235
236     # The per-test timeout in milliseconds, if no --time-out-ms option was
237     # given to run_webkit_tests. This should correspond to the default timeout
238     # in DumpRenderTree.
239     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
240
241     def __init__(self, port, options, printer):
242         """Initialize test runner data structures.
243
244         Args:
245           port: an object implementing port-specific
246           options: a dictionary of command line options
247           printer: a Printer object to record updates to.
248         """
249         self._port = port
250         self._options = options
251         self._printer = printer
252
253         # disable wss server. need to install pyOpenSSL on buildbots.
254         # self._websocket_secure_server = websocket_server.PyWebSocket(
255         #        options.results_directory, use_tls=True, port=9323)
256
257         # a list of TestType objects
258         self._test_types = [text_diff.TestTextDiff]
259         if options.pixel_tests:
260             self._test_types.append(image_diff.ImageDiff)
261
262         # a set of test files, and the same tests as a list
263         self._test_files = set()
264         self._test_files_list = None
265         self._result_queue = Queue.Queue()
266         self._retrying = False
267
268     def collect_tests(self, args, last_unexpected_results):
269         """Find all the files to test.
270
271         Args:
272           args: list of test arguments from the command line
273           last_unexpected_results: list of unexpected results to retest, if any
274
275         """
276         paths = [self._strip_test_dir_prefix(arg) for arg in args if arg and arg != '']
277         paths += last_unexpected_results
278         if self._options.test_list:
279             paths += read_test_files(self._options.test_list)
280         self._test_files = self._port.tests(paths)
281
282     def _strip_test_dir_prefix(self, path):
283         if path.startswith(LAYOUT_TESTS_DIRECTORY):
284             return path[len(LAYOUT_TESTS_DIRECTORY):]
285         return path
286
287     def lint(self):
288         # Creating the expecations for each platform/configuration pair does
289         # all the test list parsing and ensures it's correct syntax (e.g. no
290         # dupes).
291         for platform_name in self._port.test_platform_names():
292             self.parse_expectations(platform_name, is_debug_mode=True)
293             self.parse_expectations(platform_name, is_debug_mode=False)
294         self._printer.write("")
295         _log.info("If there are no fail messages, errors or exceptions, "
296                   "then the lint succeeded.")
297         return 0
298
299     def parse_expectations(self, test_platform_name, is_debug_mode):
300         """Parse the expectations from the test_list files and return a data
301         structure holding them. Throws an error if the test_list files have
302         invalid syntax."""
303         if self._options.lint_test_files:
304             test_files = None
305         else:
306             test_files = self._test_files
307
308         try:
309             expectations_str = self._port.test_expectations()
310             overrides_str = self._port.test_expectations_overrides()
311             self._expectations = test_expectations.TestExpectations(
312                 self._port, test_files, expectations_str, test_platform_name,
313                 is_debug_mode, self._options.lint_test_files,
314                 overrides=overrides_str)
315             return self._expectations
316         except SyntaxError, err:
317             if self._options.lint_test_files:
318                 print str(err)
319             else:
320                 raise err
321
322     def prepare_lists_and_print_output(self):
323         """Create appropriate subsets of test lists and returns a
324         ResultSummary object. Also prints expected test counts.
325         """
326
327         # Remove skipped - both fixable and ignored - files from the
328         # top-level list of files to test.
329         num_all_test_files = len(self._test_files)
330         self._printer.print_expected("Found:  %d tests" %
331                                      (len(self._test_files)))
332         if not num_all_test_files:
333             _log.critical('No tests to run.')
334             return None
335
336         skipped = set()
337         if num_all_test_files > 1 and not self._options.force:
338             skipped = self._expectations.get_tests_with_result_type(
339                            test_expectations.SKIP)
340             self._test_files -= skipped
341
342         # Create a sorted list of test files so the subset chunk,
343         # if used, contains alphabetically consecutive tests.
344         self._test_files_list = list(self._test_files)
345         if self._options.randomize_order:
346             random.shuffle(self._test_files_list)
347         else:
348             self._test_files_list.sort()
349
350         # If the user specifies they just want to run a subset of the tests,
351         # just grab a subset of the non-skipped tests.
352         if self._options.run_chunk or self._options.run_part:
353             chunk_value = self._options.run_chunk or self._options.run_part
354             test_files = self._test_files_list
355             try:
356                 (chunk_num, chunk_len) = chunk_value.split(":")
357                 chunk_num = int(chunk_num)
358                 assert(chunk_num >= 0)
359                 test_size = int(chunk_len)
360                 assert(test_size > 0)
361             except:
362                 _log.critical("invalid chunk '%s'" % chunk_value)
363                 return None
364
365             # Get the number of tests
366             num_tests = len(test_files)
367
368             # Get the start offset of the slice.
369             if self._options.run_chunk:
370                 chunk_len = test_size
371                 # In this case chunk_num can be really large. We need
372                 # to make the slave fit in the current number of tests.
373                 slice_start = (chunk_num * chunk_len) % num_tests
374             else:
375                 # Validate the data.
376                 assert(test_size <= num_tests)
377                 assert(chunk_num <= test_size)
378
379                 # To count the chunk_len, and make sure we don't skip
380                 # some tests, we round to the next value that fits exactly
381                 # all the parts.
382                 rounded_tests = num_tests
383                 if rounded_tests % test_size != 0:
384                     rounded_tests = (num_tests + test_size -
385                                      (num_tests % test_size))
386
387                 chunk_len = rounded_tests / test_size
388                 slice_start = chunk_len * (chunk_num - 1)
389                 # It does not mind if we go over test_size.
390
391             # Get the end offset of the slice.
392             slice_end = min(num_tests, slice_start + chunk_len)
393
394             files = test_files[slice_start:slice_end]
395
396             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
397                 (slice_end - slice_start), slice_start, slice_end, num_tests)
398             self._printer.print_expected(tests_run_msg)
399
400             # If we reached the end and we don't have enough tests, we run some
401             # from the beginning.
402             if slice_end - slice_start < chunk_len:
403                 extra = chunk_len - (slice_end - slice_start)
404                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
405                             extra)
406                 self._printer.print_expected(extra_msg)
407                 tests_run_msg += "\n" + extra_msg
408                 files.extend(test_files[0:extra])
409             tests_run_filename = os.path.join(self._options.results_directory,
410                                               "tests_run.txt")
411             with codecs.open(tests_run_filename, "w", "utf-8") as file:
412                 file.write(tests_run_msg + "\n")
413
414             len_skip_chunk = int(len(files) * len(skipped) /
415                                  float(len(self._test_files)))
416             skip_chunk_list = list(skipped)[0:len_skip_chunk]
417             skip_chunk = set(skip_chunk_list)
418
419             # Update expectations so that the stats are calculated correctly.
420             # We need to pass a list that includes the right # of skipped files
421             # to ParseExpectations so that ResultSummary() will get the correct
422             # stats. So, we add in the subset of skipped files, and then
423             # subtract them back out.
424             self._test_files_list = files + skip_chunk_list
425             self._test_files = set(self._test_files_list)
426
427             self._expectations = self.parse_expectations(
428                 self._port.test_platform_name(),
429                 self._options.configuration == 'Debug')
430
431             self._test_files = set(files)
432             self._test_files_list = files
433         else:
434             skip_chunk = skipped
435
436         result_summary = ResultSummary(self._expectations,
437             self._test_files | skip_chunk)
438         self._print_expected_results_of_type(result_summary,
439             test_expectations.PASS, "passes")
440         self._print_expected_results_of_type(result_summary,
441             test_expectations.FAIL, "failures")
442         self._print_expected_results_of_type(result_summary,
443             test_expectations.FLAKY, "flaky")
444         self._print_expected_results_of_type(result_summary,
445             test_expectations.SKIP, "skipped")
446
447         if self._options.force:
448             self._printer.print_expected('Running all tests, including '
449                                          'skips (--force)')
450         else:
451             # Note that we don't actually run the skipped tests (they were
452             # subtracted out of self._test_files, above), but we stub out the
453             # results here so the statistics can remain accurate.
454             for test in skip_chunk:
455                 result = test_results.TestResult(test,
456                     failures=[], test_run_time=0, total_time_for_all_diffs=0,
457                     time_for_diffs=0)
458                 result.type = test_expectations.SKIP
459                 result_summary.add(result, expected=True)
460         self._printer.print_expected('')
461
462         return result_summary
463
464     def _get_dir_for_test_file(self, test_file):
465         """Returns the highest-level directory by which to shard the given
466         test file."""
467         index = test_file.rfind(os.sep + LAYOUT_TESTS_DIRECTORY)
468
469         test_file = test_file[index + len(LAYOUT_TESTS_DIRECTORY):]
470         test_file_parts = test_file.split(os.sep, 1)
471         directory = test_file_parts[0]
472         test_file = test_file_parts[1]
473
474         # The http tests are very stable on mac/linux.
475         # TODO(ojan): Make the http server on Windows be apache so we can
476         # turn shard the http tests there as well. Switching to apache is
477         # what made them stable on linux/mac.
478         return_value = directory
479         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
480                 and test_file.find(os.sep) >= 0):
481             test_file_parts = test_file.split(os.sep, 1)
482             directory = test_file_parts[0]
483             return_value = os.path.join(return_value, directory)
484             test_file = test_file_parts[1]
485
486         return return_value
487
488     def _get_test_input_for_file(self, test_file):
489         """Returns the appropriate TestInput object for the file. Mostly this
490         is used for looking up the timeout value (in ms) to use for the given
491         test."""
492         if self._expectations.has_modifier(test_file, test_expectations.SLOW):
493             return TestInput(test_file, self._options.slow_time_out_ms)
494         return TestInput(test_file, self._options.time_out_ms)
495
496     def _test_requires_lock(self, test_file):
497         """Return True if the test needs to be locked when
498         running multiple copies of NRWTs."""
499         split_path = test_file.split(os.sep)
500         return 'http' in split_path or 'websocket' in split_path
501
502     def _get_test_file_queue(self, test_files):
503         """Create the thread safe queue of lists of (test filenames, test URIs)
504         tuples. Each TestShellThread pulls a list from this queue and runs
505         those tests in order before grabbing the next available list.
506
507         Shard the lists by directory. This helps ensure that tests that depend
508         on each other (aka bad tests!) continue to run together as most
509         cross-tests dependencies tend to occur within the same directory.
510
511         Return:
512           The Queue of lists of TestInput objects.
513         """
514
515         test_lists = []
516         tests_to_http_lock = []
517         if (self._options.experimental_fully_parallel or
518             self._is_single_threaded()):
519             for test_file in test_files:
520                 test_input = self._get_test_input_for_file(test_file)
521                 if self._test_requires_lock(test_file):
522                     tests_to_http_lock.append(test_input)
523                 else:
524                     test_lists.append((".", [test_input]))
525         else:
526             tests_by_dir = {}
527             for test_file in test_files:
528                 directory = self._get_dir_for_test_file(test_file)
529                 test_input = self._get_test_input_for_file(test_file)
530                 if self._test_requires_lock(test_file):
531                     tests_to_http_lock.append(test_input)
532                 else:
533                     tests_by_dir.setdefault(directory, [])
534                     tests_by_dir[directory].append(test_input)
535             # Sort by the number of tests in the dir so that the ones with the
536             # most tests get run first in order to maximize parallelization.
537             # Number of tests is a good enough, but not perfect, approximation
538             # of how long that set of tests will take to run. We can't just use
539             # a PriorityQueue until we move to Python 2.6.
540             for directory in tests_by_dir:
541                 test_list = tests_by_dir[directory]
542                 # Keep the tests in alphabetical order.
543                 # FIXME: Remove once tests are fixed so they can be run in any
544                 # order.
545                 test_list.reverse()
546                 test_list_tuple = (directory, test_list)
547                 test_lists.append(test_list_tuple)
548             test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
549
550         # Put the http tests first. There are only a couple hundred of them,
551         # but each http test takes a very long time to run, so sorting by the
552         # number of tests doesn't accurately capture how long they take to run.
553         if tests_to_http_lock:
554             tests_to_http_lock.reverse()
555             test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
556
557         filename_queue = Queue.Queue()
558         for item in test_lists:
559             filename_queue.put(item)
560         return filename_queue
561
562     def _get_test_args(self, index):
563         """Returns the tuple of arguments for tests and for DumpRenderTree."""
564         test_args = test_type_base.TestArguments()
565         test_args.png_path = None
566         if self._options.pixel_tests:
567             png_path = os.path.join(self._options.results_directory,
568                                     "png_result%s.png" % index)
569             test_args.png_path = png_path
570         test_args.new_baseline = self._options.new_baseline
571         test_args.reset_results = self._options.reset_results
572
573         return test_args
574
575     def _contains_tests(self, subdir):
576         for test_file in self._test_files:
577             if test_file.find(subdir) >= 0:
578                 return True
579         return False
580
581     def _instantiate_dump_render_tree_threads(self, test_files,
582                                               result_summary):
583         """Instantitates and starts the TestShellThread(s).
584
585         Return:
586           The list of threads.
587         """
588         filename_queue = self._get_test_file_queue(test_files)
589
590         # Instantiate TestShellThreads and start them.
591         threads = []
592         for i in xrange(int(self._options.child_processes)):
593             # Create separate TestTypes instances for each thread.
594             test_types = []
595             for test_type in self._test_types:
596                 test_types.append(test_type(self._port,
597                                     self._options.results_directory))
598
599             test_args = self._get_test_args(i)
600             thread = dump_render_tree_thread.TestShellThread(self._port,
601                 self._options, filename_queue, self._result_queue,
602                 test_types, test_args)
603             if self._is_single_threaded():
604                 thread.run_in_main_thread(self, result_summary)
605             else:
606                 thread.start()
607             threads.append(thread)
608
609         return threads
610
611     def _is_single_threaded(self):
612         """Returns whether we should run all the tests in the main thread."""
613         return int(self._options.child_processes) == 1
614
615     def _run_tests(self, file_list, result_summary):
616         """Runs the tests in the file_list.
617
618         Return: A tuple (keyboard_interrupted, thread_timings, test_timings,
619             individual_test_timings)
620             keyboard_interrupted is whether someone typed Ctrl^C
621             thread_timings is a list of dicts with the total runtime
622               of each thread with 'name', 'num_tests', 'total_time' properties
623             test_timings is a list of timings for each sharded subdirectory
624               of the form [time, directory_name, num_tests]
625             individual_test_timings is a list of run times for each test
626               in the form {filename:filename, test_run_time:test_run_time}
627             result_summary: summary object to populate with the results
628         """
629         # FIXME: We should use webkitpy.tool.grammar.pluralize here.
630         plural = ""
631         if not self._is_single_threaded():
632             plural = "s"
633         self._printer.print_update('Starting %s%s ...' %
634                                    (self._port.driver_name(), plural))
635         threads = self._instantiate_dump_render_tree_threads(file_list,
636                                                              result_summary)
637         self._printer.print_update("Starting testing ...")
638
639         keyboard_interrupted = self._wait_for_threads_to_finish(threads,
640                                                                 result_summary)
641         (thread_timings, test_timings, individual_test_timings) = \
642             self._collect_timing_info(threads)
643
644         return (keyboard_interrupted, thread_timings, test_timings,
645                 individual_test_timings)
646
647     def _wait_for_threads_to_finish(self, threads, result_summary):
648         keyboard_interrupted = False
649         try:
650             # Loop through all the threads waiting for them to finish.
651             some_thread_is_alive = True
652             while some_thread_is_alive:
653                 some_thread_is_alive = False
654                 t = time.time()
655                 for thread in threads:
656                     exception_info = thread.exception_info()
657                     if exception_info is not None:
658                         # Re-raise the thread's exception here to make it
659                         # clear that testing was aborted. Otherwise,
660                         # the tests that did not run would be assumed
661                         # to have passed.
662                         raise exception_info[0], exception_info[1], exception_info[2]
663
664                     if thread.isAlive():
665                         some_thread_is_alive = True
666                         next_timeout = thread.next_timeout()
667                         if (next_timeout and t > next_timeout):
668                             message_broker.log_wedged_thread(thread.id())
669                             thread.clear_next_timeout()
670
671                 self.update_summary(result_summary)
672
673                 if some_thread_is_alive:
674                     time.sleep(0.01)
675
676         except KeyboardInterrupt:
677             keyboard_interrupted = True
678             for thread in threads:
679                 thread.cancel()
680
681         return keyboard_interrupted
682
683     def _collect_timing_info(self, threads):
684         test_timings = {}
685         individual_test_timings = []
686         thread_timings = []
687
688         for thread in threads:
689             thread_timings.append({'name': thread.getName(),
690                                    'num_tests': thread.get_num_tests(),
691                                    'total_time': thread.get_total_time()})
692             test_timings.update(thread.get_test_group_timing_stats())
693             individual_test_timings.extend(thread.get_test_results())
694
695         return (thread_timings, test_timings, individual_test_timings)
696
697     def needs_http(self):
698         """Returns whether the test runner needs an HTTP server."""
699         return self._contains_tests(self.HTTP_SUBDIR)
700
701     def needs_websocket(self):
702         """Returns whether the test runner needs a WEBSOCKET server."""
703         return self._contains_tests(self.WEBSOCKET_SUBDIR)
704
705     def set_up_run(self):
706         """Configures the system to be ready to run tests.
707
708         Returns a ResultSummary object if we should continue to run tests,
709         or None if we should abort.
710
711         """
712         # This must be started before we check the system dependencies,
713         # since the helper may do things to make the setup correct.
714         self._printer.print_update("Starting helper ...")
715         self._port.start_helper()
716
717         # Check that the system dependencies (themes, fonts, ...) are correct.
718         if not self._options.nocheck_sys_deps:
719             self._printer.print_update("Checking system dependencies ...")
720             if not self._port.check_sys_deps(self.needs_http()):
721                 self._port.stop_helper()
722                 return None
723
724         if self._options.clobber_old_results:
725             self._clobber_old_results()
726
727         # Create the output directory if it doesn't already exist.
728         self._port.maybe_make_directory(self._options.results_directory)
729
730         self._port.setup_test_run()
731
732         self._printer.print_update("Preparing tests ...")
733         result_summary = self.prepare_lists_and_print_output()
734         if not result_summary:
735             return None
736
737         return result_summary
738
739     def run(self, result_summary):
740         """Run all our tests on all our test files.
741
742         For each test file, we run each test type. If there are any failures,
743         we collect them for reporting.
744
745         Args:
746           result_summary: a summary object tracking the test results.
747
748         Return:
749           The number of unexpected results (0 == success)
750         """
751         # gather_test_files() must have been called first to initialize us.
752         # If we didn't find any files to test, we've errored out already in
753         # prepare_lists_and_print_output().
754         assert(len(self._test_files))
755
756         start_time = time.time()
757
758         keyboard_interrupted, thread_timings, test_timings, \
759             individual_test_timings = (
760             self._run_tests(self._test_files_list, result_summary))
761
762         # We exclude the crashes from the list of results to retry, because
763         # we want to treat even a potentially flaky crash as an error.
764         failures = self._get_failures(result_summary, include_crashes=False)
765         retry_summary = result_summary
766         while (len(failures) and self._options.retry_failures and
767             not self._retrying and not keyboard_interrupted):
768             _log.info('')
769             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
770             _log.info('')
771             self._retrying = True
772             retry_summary = ResultSummary(self._expectations, failures.keys())
773             # Note that we intentionally ignore the return value here.
774             self._run_tests(failures.keys(), retry_summary)
775             failures = self._get_failures(retry_summary, include_crashes=True)
776
777         end_time = time.time()
778
779         self._print_timing_statistics(end_time - start_time,
780                                       thread_timings, test_timings,
781                                       individual_test_timings,
782                                       result_summary)
783
784         self._print_result_summary(result_summary)
785
786         sys.stdout.flush()
787         sys.stderr.flush()
788
789         self._printer.print_one_line_summary(result_summary.total,
790                                              result_summary.expected,
791                                              result_summary.unexpected)
792
793         unexpected_results = summarize_unexpected_results(self._port,
794             self._expectations, result_summary, retry_summary)
795         self._printer.print_unexpected_results(unexpected_results)
796
797         if self._options.record_results:
798             # Write the same data to log files and upload generated JSON files
799             # to appengine server.
800             self._upload_json_files(unexpected_results, result_summary,
801                                     individual_test_timings)
802
803         # Write the summary to disk (results.html) and display it if requested.
804         wrote_results = self._write_results_html_file(result_summary)
805         if self._options.show_results and wrote_results:
806             self._show_results_html_file()
807
808         # Now that we've completed all the processing we can, we re-raise
809         # a KeyboardInterrupt if necessary so the caller can handle it.
810         if keyboard_interrupted:
811             raise KeyboardInterrupt
812
813         # Ignore flaky failures and unexpected passes so we don't turn the
814         # bot red for those.
815         return unexpected_results['num_regressions']
816
817     def clean_up_run(self):
818         """Restores the system after we're done running tests."""
819
820         _log.debug("flushing stdout")
821         sys.stdout.flush()
822         _log.debug("flushing stderr")
823         sys.stderr.flush()
824         _log.debug("stopping helper")
825         self._port.stop_helper()
826
827     def update_summary(self, result_summary):
828         """Update the summary and print results with any completed tests."""
829         while True:
830             try:
831                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
832             except Queue.Empty:
833                 return
834
835             expected = self._expectations.matches_an_expected_result(
836                 result.filename, result.type, self._options.pixel_tests)
837             result_summary.add(result, expected)
838             exp_str = self._expectations.get_expectations_string(
839                 result.filename)
840             got_str = self._expectations.expectation_to_string(result.type)
841             self._printer.print_test_result(result, expected, exp_str, got_str)
842             self._printer.print_progress(result_summary, self._retrying,
843                                          self._test_files_list)
844
845     def _clobber_old_results(self):
846         # Just clobber the actual test results directories since the other
847         # files in the results directory are explicitly used for cross-run
848         # tracking.
849         self._printer.print_update("Clobbering old results in %s" %
850                                    self._options.results_directory)
851         layout_tests_dir = self._port.layout_tests_dir()
852         possible_dirs = self._port.test_dirs()
853         for dirname in possible_dirs:
854             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
855                 shutil.rmtree(os.path.join(self._options.results_directory,
856                                            dirname),
857                               ignore_errors=True)
858
859     def _get_failures(self, result_summary, include_crashes):
860         """Filters a dict of results and returns only the failures.
861
862         Args:
863           result_summary: the results of the test run
864           include_crashes: whether crashes are included in the output.
865             We use False when finding the list of failures to retry
866             to see if the results were flaky. Although the crashes may also be
867             flaky, we treat them as if they aren't so that they're not ignored.
868         Returns:
869           a dict of files -> results
870         """
871         failed_results = {}
872         for test, result in result_summary.unexpected_results.iteritems():
873             if (result == test_expectations.PASS or
874                 result == test_expectations.CRASH and not include_crashes):
875                 continue
876             failed_results[test] = result
877
878         return failed_results
879
880     def _upload_json_files(self, unexpected_results, result_summary,
881                         individual_test_timings):
882         """Writes the results of the test run as JSON files into the results
883         dir and upload the files to the appengine server.
884
885         There are three different files written into the results dir:
886           unexpected_results.json: A short list of any unexpected results.
887             This is used by the buildbots to display results.
888           expectations.json: This is used by the flakiness dashboard.
889           results.json: A full list of the results - used by the flakiness
890             dashboard and the aggregate results dashboard.
891
892         Args:
893           unexpected_results: dict of unexpected results
894           result_summary: full summary object
895           individual_test_timings: list of test times (used by the flakiness
896             dashboard).
897         """
898         results_directory = self._options.results_directory
899         _log.debug("Writing JSON files in %s." % results_directory)
900         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
901         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
902             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
903
904         # Write a json file of the test_expectations.txt file for the layout
905         # tests dashboard.
906         expectations_path = os.path.join(results_directory, "expectations.json")
907         expectations_json = \
908             self._expectations.get_expectations_json_for_all_platforms()
909         with codecs.open(expectations_path, "w", "utf-8") as file:
910             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
911
912         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
913             self._port, self._options.builder_name, self._options.build_name,
914             self._options.build_number, self._options.results_directory,
915             BUILDER_BASE_URL, individual_test_timings,
916             self._expectations, result_summary, self._test_files_list,
917             not self._options.upload_full_results,
918             self._options.test_results_server,
919             "layout-tests",
920             self._options.master_name)
921
922         _log.debug("Finished writing JSON files.")
923
924         json_files = ["expectations.json"]
925         if self._options.upload_full_results:
926             json_files.append("results.json")
927         else:
928             json_files.append("incremental_results.json")
929
930         generator.upload_json_files(json_files)
931
932     def _print_config(self):
933         """Prints the configuration for the test run."""
934         p = self._printer
935         p.print_config("Using port '%s'" % self._port.name())
936         p.print_config("Placing test results in %s" %
937                        self._options.results_directory)
938         if self._options.new_baseline:
939             p.print_config("Placing new baselines in %s" %
940                            self._port.baseline_path())
941         p.print_config("Using %s build" % self._options.configuration)
942         if self._options.pixel_tests:
943             p.print_config("Pixel tests enabled")
944         else:
945             p.print_config("Pixel tests disabled")
946
947         p.print_config("Regular timeout: %s, slow test timeout: %s" %
948                        (self._options.time_out_ms,
949                         self._options.slow_time_out_ms))
950
951         if self._is_single_threaded():
952             p.print_config("Running one %s" % self._port.driver_name())
953         else:
954             p.print_config("Running %s %ss in parallel" %
955                            (self._options.child_processes,
956                             self._port.driver_name()))
957         p.print_config("")
958
959     def _print_expected_results_of_type(self, result_summary,
960                                         result_type, result_type_str):
961         """Print the number of the tests in a given result class.
962
963         Args:
964           result_summary - the object containing all the results to report on
965           result_type - the particular result type to report in the summary.
966           result_type_str - a string description of the result_type.
967         """
968         tests = self._expectations.get_tests_with_result_type(result_type)
969         now = result_summary.tests_by_timeline[test_expectations.NOW]
970         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
971
972         # We use a fancy format string in order to print the data out in a
973         # nicely-aligned table.
974         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
975                   % (self._num_digits(now), self._num_digits(wontfix)))
976         self._printer.print_expected(fmtstr %
977             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
978
979     def _num_digits(self, num):
980         """Returns the number of digits needed to represent the length of a
981         sequence."""
982         ndigits = 1
983         if len(num):
984             ndigits = int(math.log10(len(num))) + 1
985         return ndigits
986
987     def _print_timing_statistics(self, total_time, thread_timings,
988                                directory_test_timings, individual_test_timings,
989                                result_summary):
990         """Record timing-specific information for the test run.
991
992         Args:
993           total_time: total elapsed time (in seconds) for the test run
994           thread_timings: wall clock time each thread ran for
995           directory_test_timings: timing by directory
996           individual_test_timings: timing by file
997           result_summary: summary object for the test run
998         """
999         self._printer.print_timing("Test timing:")
1000         self._printer.print_timing("  %6.2f total testing time" % total_time)
1001         self._printer.print_timing("")
1002         self._printer.print_timing("Thread timing:")
1003         cuml_time = 0
1004         for t in thread_timings:
1005             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
1006                   (t['name'], t['num_tests'], t['total_time']))
1007             cuml_time += t['total_time']
1008         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
1009               (cuml_time, cuml_time / int(self._options.child_processes)))
1010         self._printer.print_timing("")
1011
1012         self._print_aggregate_test_statistics(individual_test_timings)
1013         self._print_individual_test_times(individual_test_timings,
1014                                           result_summary)
1015         self._print_directory_timings(directory_test_timings)
1016
1017     def _print_aggregate_test_statistics(self, individual_test_timings):
1018         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
1019         Args:
1020           individual_test_timings: List of dump_render_tree_thread.TestStats
1021               for all tests.
1022         """
1023         test_types = []  # Unit tests don't actually produce any timings.
1024         if individual_test_timings:
1025             test_types = individual_test_timings[0].time_for_diffs.keys()
1026         times_for_dump_render_tree = []
1027         times_for_diff_processing = []
1028         times_per_test_type = {}
1029         for test_type in test_types:
1030             times_per_test_type[test_type] = []
1031
1032         for test_stats in individual_test_timings:
1033             times_for_dump_render_tree.append(test_stats.test_run_time)
1034             times_for_diff_processing.append(
1035                 test_stats.total_time_for_all_diffs)
1036             time_for_diffs = test_stats.time_for_diffs
1037             for test_type in test_types:
1038                 times_per_test_type[test_type].append(
1039                     time_for_diffs[test_type])
1040
1041         self._print_statistics_for_test_timings(
1042             "PER TEST TIME IN TESTSHELL (seconds):",
1043             times_for_dump_render_tree)
1044         self._print_statistics_for_test_timings(
1045             "PER TEST DIFF PROCESSING TIMES (seconds):",
1046             times_for_diff_processing)
1047         for test_type in test_types:
1048             self._print_statistics_for_test_timings(
1049                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
1050                 times_per_test_type[test_type])
1051
1052     def _print_individual_test_times(self, individual_test_timings,
1053                                   result_summary):
1054         """Prints the run times for slow, timeout and crash tests.
1055         Args:
1056           individual_test_timings: List of dump_render_tree_thread.TestStats
1057               for all tests.
1058           result_summary: summary object for test run
1059         """
1060         # Reverse-sort by the time spent in DumpRenderTree.
1061         individual_test_timings.sort(lambda a, b:
1062             cmp(b.test_run_time, a.test_run_time))
1063
1064         num_printed = 0
1065         slow_tests = []
1066         timeout_or_crash_tests = []
1067         unexpected_slow_tests = []
1068         for test_tuple in individual_test_timings:
1069             filename = test_tuple.filename
1070             is_timeout_crash_or_slow = False
1071             if self._expectations.has_modifier(filename,
1072                                                test_expectations.SLOW):
1073                 is_timeout_crash_or_slow = True
1074                 slow_tests.append(test_tuple)
1075
1076             if filename in result_summary.failures:
1077                 result = result_summary.results[filename].type
1078                 if (result == test_expectations.TIMEOUT or
1079                     result == test_expectations.CRASH):
1080                     is_timeout_crash_or_slow = True
1081                     timeout_or_crash_tests.append(test_tuple)
1082
1083             if (not is_timeout_crash_or_slow and
1084                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1085                 num_printed = num_printed + 1
1086                 unexpected_slow_tests.append(test_tuple)
1087
1088         self._printer.print_timing("")
1089         self._print_test_list_timing("%s slowest tests that are not "
1090             "marked as SLOW and did not timeout/crash:" %
1091             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1092         self._printer.print_timing("")
1093         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1094         self._printer.print_timing("")
1095         self._print_test_list_timing("Tests that timed out or crashed:",
1096                                      timeout_or_crash_tests)
1097         self._printer.print_timing("")
1098
1099     def _print_test_list_timing(self, title, test_list):
1100         """Print timing info for each test.
1101
1102         Args:
1103           title: section heading
1104           test_list: tests that fall in this section
1105         """
1106         if self._printer.disabled('slowest'):
1107             return
1108
1109         self._printer.print_timing(title)
1110         for test_tuple in test_list:
1111             filename = test_tuple.filename[len(
1112                 self._port.layout_tests_dir()) + 1:]
1113             filename = filename.replace('\\', '/')
1114             test_run_time = round(test_tuple.test_run_time, 1)
1115             self._printer.print_timing("  %s took %s seconds" %
1116                                        (filename, test_run_time))
1117
1118     def _print_directory_timings(self, directory_test_timings):
1119         """Print timing info by directory for any directories that
1120         take > 10 seconds to run.
1121
1122         Args:
1123           directory_test_timing: time info for each directory
1124         """
1125         timings = []
1126         for directory in directory_test_timings:
1127             num_tests, time_for_directory = directory_test_timings[directory]
1128             timings.append((round(time_for_directory, 1), directory,
1129                             num_tests))
1130         timings.sort()
1131
1132         self._printer.print_timing("Time to process slowest subdirectories:")
1133         min_seconds_to_print = 10
1134         for timing in timings:
1135             if timing[0] > min_seconds_to_print:
1136                 self._printer.print_timing(
1137                     "  %s took %s seconds to run %s tests." % (timing[1],
1138                     timing[0], timing[2]))
1139         self._printer.print_timing("")
1140
1141     def _print_statistics_for_test_timings(self, title, timings):
1142         """Prints the median, mean and standard deviation of the values in
1143         timings.
1144
1145         Args:
1146           title: Title for these timings.
1147           timings: A list of floats representing times.
1148         """
1149         self._printer.print_timing(title)
1150         timings.sort()
1151
1152         num_tests = len(timings)
1153         if not num_tests:
1154             return
1155         percentile90 = timings[int(.9 * num_tests)]
1156         percentile99 = timings[int(.99 * num_tests)]
1157
1158         if num_tests % 2 == 1:
1159             median = timings[((num_tests - 1) / 2) - 1]
1160         else:
1161             lower = timings[num_tests / 2 - 1]
1162             upper = timings[num_tests / 2]
1163             median = (float(lower + upper)) / 2
1164
1165         mean = sum(timings) / num_tests
1166
1167         for time in timings:
1168             sum_of_deviations = math.pow(time - mean, 2)
1169
1170         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1171         self._printer.print_timing("  Median:          %6.3f" % median)
1172         self._printer.print_timing("  Mean:            %6.3f" % mean)
1173         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1174         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1175         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1176         self._printer.print_timing("")
1177
1178     def _print_result_summary(self, result_summary):
1179         """Print a short summary about how many tests passed.
1180
1181         Args:
1182           result_summary: information to log
1183         """
1184         failed = len(result_summary.failures)
1185         skipped = len(
1186             result_summary.tests_by_expectation[test_expectations.SKIP])
1187         total = result_summary.total
1188         passed = total - failed - skipped
1189         pct_passed = 0.0
1190         if total > 0:
1191             pct_passed = float(passed) * 100 / total
1192
1193         self._printer.print_actual("")
1194         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1195                      (passed, total, pct_passed))
1196         self._printer.print_actual("")
1197         self._print_result_summary_entry(result_summary,
1198             test_expectations.NOW, "Tests to be fixed")
1199
1200         self._printer.print_actual("")
1201         self._print_result_summary_entry(result_summary,
1202             test_expectations.WONTFIX,
1203             "Tests that will only be fixed if they crash (WONTFIX)")
1204         self._printer.print_actual("")
1205
1206     def _print_result_summary_entry(self, result_summary, timeline,
1207                                     heading):
1208         """Print a summary block of results for a particular timeline of test.
1209
1210         Args:
1211           result_summary: summary to print results for
1212           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1213           heading: a textual description of the timeline
1214         """
1215         total = len(result_summary.tests_by_timeline[timeline])
1216         not_passing = (total -
1217            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1218                result_summary.tests_by_timeline[timeline]))
1219         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1220
1221         for result in TestExpectationsFile.EXPECTATION_ORDER:
1222             if result == test_expectations.PASS:
1223                 continue
1224             results = (result_summary.tests_by_expectation[result] &
1225                        result_summary.tests_by_timeline[timeline])
1226             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1227             if not_passing and len(results):
1228                 pct = len(results) * 100.0 / not_passing
1229                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1230                     (len(results), desc[len(results) != 1], pct))
1231
1232     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1233         """
1234         test_files = a list of file paths
1235         failures = dictionary mapping test paths to failure objects
1236         title = title printed at top of test
1237         override_time = current time (used by unit tests)
1238         """
1239         page = """<html>
1240   <head>
1241     <title>Layout Test Results (%(time)s)</title>
1242   </head>
1243   <body>
1244     <h2>%(title)s (%(time)s)</h2>
1245         """ % {'title': title, 'time': override_time or time.asctime()}
1246
1247         for test_file in sorted(test_files):
1248             test_name = self._port.relative_test_filename(test_file)
1249             test_url = self._port.filename_to_uri(test_file)
1250             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1251             test_failures = failures.get(test_file, [])
1252             for failure in test_failures:
1253                 page += (u"&nbsp;&nbsp;%s<br/>" %
1254                          failure.result_html_output(test_name))
1255             page += "</p>\n"
1256         page += "</body></html>\n"
1257         return page
1258
1259     def _write_results_html_file(self, result_summary):
1260         """Write results.html which is a summary of tests that failed.
1261
1262         Args:
1263           result_summary: a summary of the results :)
1264
1265         Returns:
1266           True if any results were written (since expected failures may be
1267           omitted)
1268         """
1269         # test failures
1270         if self._options.full_results_html:
1271             results_title = "Test Failures"
1272             test_files = result_summary.failures.keys()
1273         else:
1274             results_title = "Unexpected Test Failures"
1275             unexpected_failures = self._get_failures(result_summary,
1276                 include_crashes=True)
1277             test_files = unexpected_failures.keys()
1278         if not len(test_files):
1279             return False
1280
1281         out_filename = os.path.join(self._options.results_directory,
1282                                     "results.html")
1283         with codecs.open(out_filename, "w", "utf-8") as results_file:
1284             html = self._results_html(test_files, result_summary.failures, results_title)
1285             results_file.write(html)
1286
1287         return True
1288
1289     def _show_results_html_file(self):
1290         """Shows the results.html page."""
1291         results_filename = os.path.join(self._options.results_directory,
1292                                         "results.html")
1293         self._port.show_results_html_file(results_filename)
1294
1295
1296 def read_test_files(files):
1297     tests = []
1298     for file in files:
1299         try:
1300             with codecs.open(file, 'r', 'utf-8') as file_contents:
1301                 # FIXME: This could be cleaner using a list comprehension.
1302                 for line in file_contents:
1303                     line = test_expectations.strip_comments(line)
1304                     if line:
1305                         tests.append(line)
1306         except IOError, e:
1307             if e.errno == errno.ENOENT:
1308                 _log.critical('')
1309                 _log.critical('--test-list file "%s" not found' % file)
1310             raise
1311     return tests
1312
1313
1314 def run(port, options, args, regular_output=sys.stderr,
1315         buildbot_output=sys.stdout):
1316     """Run the tests.
1317
1318     Args:
1319       port: Port object for port-specific behavior
1320       options: a dictionary of command line options
1321       args: a list of sub directories or files to test
1322       regular_output: a stream-like object that we can send logging/debug
1323           output to
1324       buildbot_output: a stream-like object that we can write all output that
1325           is intended to be parsed by the buildbot to
1326     Returns:
1327       the number of unexpected results that occurred, or -1 if there is an
1328           error.
1329
1330     """
1331     _set_up_derived_options(port, options)
1332
1333     printer = printing.Printer(port, options, regular_output, buildbot_output,
1334         int(options.child_processes), options.experimental_fully_parallel)
1335     if options.help_printing:
1336         printer.help_printing()
1337         printer.cleanup()
1338         return 0
1339
1340     last_unexpected_results = _gather_unexpected_results(options)
1341     if options.print_last_failures:
1342         printer.write("\n".join(last_unexpected_results) + "\n")
1343         printer.cleanup()
1344         return 0
1345
1346     # We wrap any parts of the run that are slow or likely to raise exceptions
1347     # in a try/finally to ensure that we clean up the logging configuration.
1348     num_unexpected_results = -1
1349     try:
1350         test_runner = TestRunner(port, options, printer)
1351         test_runner._print_config()
1352
1353         printer.print_update("Collecting tests ...")
1354         try:
1355             test_runner.collect_tests(args, last_unexpected_results)
1356         except IOError, e:
1357             if e.errno == errno.ENOENT:
1358                 return -1
1359             raise
1360
1361         printer.print_update("Parsing expectations ...")
1362         if options.lint_test_files:
1363             return test_runner.lint()
1364         test_runner.parse_expectations(port.test_platform_name(),
1365                                        options.configuration == 'Debug')
1366
1367         printer.print_update("Checking build ...")
1368         if not port.check_build(test_runner.needs_http()):
1369             _log.error("Build check failed")
1370             return -1
1371
1372         result_summary = test_runner.set_up_run()
1373         if result_summary:
1374             num_unexpected_results = test_runner.run(result_summary)
1375             test_runner.clean_up_run()
1376             _log.debug("Testing completed, Exit status: %d" %
1377                        num_unexpected_results)
1378     finally:
1379         printer.cleanup()
1380
1381     return num_unexpected_results
1382
1383
1384 def _set_up_derived_options(port_obj, options):
1385     """Sets the options values that depend on other options values."""
1386
1387     if not options.child_processes:
1388         # FIXME: Investigate perf/flakiness impact of using cpu_count + 1.
1389         options.child_processes = os.environ.get("WEBKIT_TEST_CHILD_PROCESSES",
1390                                                  str(port_obj.default_child_processes()))
1391
1392     if not options.configuration:
1393         options.configuration = port_obj.default_configuration()
1394
1395     if options.pixel_tests is None:
1396         options.pixel_tests = True
1397
1398     if not options.use_apache:
1399         options.use_apache = sys.platform in ('darwin', 'linux2')
1400
1401     if not os.path.isabs(options.results_directory):
1402         # This normalizes the path to the build dir.
1403         # FIXME: how this happens is not at all obvious; this is a dumb
1404         # interface and should be cleaned up.
1405         options.results_directory = port_obj.results_directory()
1406
1407     if not options.time_out_ms:
1408         if options.configuration == "Debug":
1409             options.time_out_ms = str(2 * TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1410         else:
1411             options.time_out_ms = str(TestRunner.DEFAULT_TEST_TIMEOUT_MS)
1412
1413     options.slow_time_out_ms = str(5 * int(options.time_out_ms))
1414
1415
1416 def _gather_unexpected_results(options):
1417     """Returns the unexpected results from the previous run, if any."""
1418     last_unexpected_results = []
1419     if options.print_last_failures or options.retest_last_failures:
1420         unexpected_results_filename = os.path.join(
1421         options.results_directory, "unexpected_results.json")
1422         with codecs.open(unexpected_results_filename, "r", "utf-8") as file:
1423             results = simplejson.load(file)
1424         last_unexpected_results = results['tests'].keys()
1425     return last_unexpected_results
1426
1427
1428 def _compat_shim_callback(option, opt_str, value, parser):
1429     print "Ignoring unsupported option: %s" % opt_str
1430
1431
1432 def _compat_shim_option(option_name, **kwargs):
1433     return optparse.make_option(option_name, action="callback",
1434         callback=_compat_shim_callback,
1435         help="Ignored, for old-run-webkit-tests compat only.", **kwargs)
1436
1437
1438 def parse_args(args=None):
1439     """Provides a default set of command line args.
1440
1441     Returns a tuple of options, args from optparse"""
1442
1443     # FIXME: All of these options should be stored closer to the code which
1444     # FIXME: actually uses them. configuration_options should move
1445     # FIXME: to WebKitPort and be shared across all scripts.
1446     configuration_options = [
1447         optparse.make_option("-t", "--target", dest="configuration",
1448                              help="(DEPRECATED)"),
1449         # FIXME: --help should display which configuration is default.
1450         optparse.make_option('--debug', action='store_const', const='Debug',
1451                              dest="configuration",
1452                              help='Set the configuration to Debug'),
1453         optparse.make_option('--release', action='store_const',
1454                              const='Release', dest="configuration",
1455                              help='Set the configuration to Release'),
1456         # old-run-webkit-tests also accepts -c, --configuration CONFIGURATION.
1457     ]
1458
1459     print_options = printing.print_options()
1460
1461     # FIXME: These options should move onto the ChromiumPort.
1462     chromium_options = [
1463         optparse.make_option("--chromium", action="store_true", default=False,
1464             help="use the Chromium port"),
1465         optparse.make_option("--startup-dialog", action="store_true",
1466             default=False, help="create a dialog on DumpRenderTree startup"),
1467         optparse.make_option("--gp-fault-error-box", action="store_true",
1468             default=False, help="enable Windows GP fault error box"),
1469         optparse.make_option("--multiple-loads",
1470             type="int", help="turn on multiple loads of each test"),
1471         optparse.make_option("--js-flags",
1472             type="string", help="JavaScript flags to pass to tests"),
1473         optparse.make_option("--nocheck-sys-deps", action="store_true",
1474             default=False,
1475             help="Don't check the system dependencies (themes)"),
1476         optparse.make_option("--use-drt", action="store_true",
1477             default=None,
1478             help="Use DumpRenderTree instead of test_shell"),
1479         optparse.make_option("--accelerated-compositing",
1480             action="store_true",
1481             help="Use hardware-accelated compositing for rendering"),
1482         optparse.make_option("--no-accelerated-compositing",
1483             action="store_false",
1484             dest="accelerated_compositing",
1485             help="Don't use hardware-accelerated compositing for rendering"),
1486         optparse.make_option("--accelerated-2d-canvas",
1487             action="store_true",
1488             help="Use hardware-accelerated 2D Canvas calls"),
1489         optparse.make_option("--no-accelerated-2d-canvas",
1490             action="store_false",
1491             dest="accelerated_2d_canvas",
1492             help="Don't use hardware-accelerated 2D Canvas calls"),
1493     ]
1494
1495     # Missing Mac-specific old-run-webkit-tests options:
1496     # FIXME: Need: -g, --guard for guard malloc support on Mac.
1497     # FIXME: Need: -l --leaks    Enable leaks checking.
1498     # FIXME: Need: --sample-on-timeout Run sample on timeout
1499
1500     old_run_webkit_tests_compat = [
1501         # NRWT doesn't generate results by default anyway.
1502         _compat_shim_option("--no-new-test-results"),
1503         # NRWT doesn't sample on timeout yet anyway.
1504         _compat_shim_option("--no-sample-on-timeout"),
1505         # FIXME: NRWT needs to support remote links eventually.
1506         _compat_shim_option("--use-remote-links-to-tests"),
1507         # FIXME: NRWT doesn't need this option as much since failures are
1508         # designed to be cheap.  We eventually plan to add this support.
1509         _compat_shim_option("--exit-after-n-failures", nargs=1, type="int"),
1510     ]
1511
1512     results_options = [
1513         # NEED for bots: --use-remote-links-to-tests Link to test files
1514         # within the SVN repository in the results.
1515         optparse.make_option("-p", "--pixel-tests", action="store_true",
1516             dest="pixel_tests", help="Enable pixel-to-pixel PNG comparisons"),
1517         optparse.make_option("--no-pixel-tests", action="store_false",
1518             dest="pixel_tests", help="Disable pixel-to-pixel PNG comparisons"),
1519         optparse.make_option("--tolerance",
1520             help="Ignore image differences less than this percentage (some "
1521                 "ports may ignore this option)", type="float"),
1522         optparse.make_option("--results-directory",
1523             default="layout-test-results",
1524             help="Output results directory source dir, relative to Debug or "
1525                  "Release"),
1526         optparse.make_option("--new-baseline", action="store_true",
1527             default=False, help="Save all generated results as new baselines "
1528                  "into the platform directory, overwriting whatever's "
1529                  "already there."),
1530         optparse.make_option("--reset-results", action="store_true",
1531             default=False, help="Reset any existing baselines to the "
1532                  "generated results"),
1533         optparse.make_option("--no-show-results", action="store_false",
1534             default=True, dest="show_results",
1535             help="Don't launch a browser with results after the tests "
1536                  "are done"),
1537         # FIXME: We should have a helper function to do this sort of
1538         # deprectated mapping and automatically log, etc.
1539         optparse.make_option("--noshow-results", action="store_false",
1540             dest="show_results",
1541             help="Deprecated, same as --no-show-results."),
1542         optparse.make_option("--no-launch-safari", action="store_false",
1543             dest="show_results",
1544             help="old-run-webkit-tests compat, same as --noshow-results."),
1545         # old-run-webkit-tests:
1546         # --[no-]launch-safari    Launch (or do not launch) Safari to display
1547         #                         test results (default: launch)
1548         optparse.make_option("--full-results-html", action="store_true",
1549             default=False,
1550             help="Show all failures in results.html, rather than only "
1551                  "regressions"),
1552         optparse.make_option("--clobber-old-results", action="store_true",
1553             default=False, help="Clobbers test results from previous runs."),
1554         optparse.make_option("--platform",
1555             help="Override the platform for expected results"),
1556         optparse.make_option("--no-record-results", action="store_false",
1557             default=True, dest="record_results",
1558             help="Don't record the results."),
1559         # old-run-webkit-tests also has HTTP toggle options:
1560         # --[no-]http                     Run (or do not run) http tests
1561         #                                 (default: run)
1562     ]
1563
1564     test_options = [
1565         optparse.make_option("--build", dest="build",
1566             action="store_true", default=True,
1567             help="Check to ensure the DumpRenderTree build is up-to-date "
1568                  "(default)."),
1569         optparse.make_option("--no-build", dest="build",
1570             action="store_false", help="Don't check to see if the "
1571                                        "DumpRenderTree build is up-to-date."),
1572         # old-run-webkit-tests has --valgrind instead of wrapper.
1573         optparse.make_option("--wrapper",
1574             help="wrapper command to insert before invocations of "
1575                  "DumpRenderTree; option is split on whitespace before "
1576                  "running. (Example: --wrapper='valgrind --smc-check=all')"),
1577         # old-run-webkit-tests:
1578         # -i|--ignore-tests               Comma-separated list of directories
1579         #                                 or tests to ignore
1580         optparse.make_option("--test-list", action="append",
1581             help="read list of tests to run from file", metavar="FILE"),
1582         # old-run-webkit-tests uses --skipped==[default|ignore|only]
1583         # instead of --force:
1584         optparse.make_option("--force", action="store_true", default=False,
1585             help="Run all tests, even those marked SKIP in the test list"),
1586         optparse.make_option("--use-apache", action="store_true",
1587             default=False, help="Whether to use apache instead of lighttpd."),
1588         optparse.make_option("--time-out-ms",
1589             help="Set the timeout for each test"),
1590         # old-run-webkit-tests calls --randomize-order --random:
1591         optparse.make_option("--randomize-order", action="store_true",
1592             default=False, help=("Run tests in random order (useful "
1593                                 "for tracking down corruption)")),
1594         optparse.make_option("--run-chunk",
1595             help=("Run a specified chunk (n:l), the nth of len l, "
1596                  "of the layout tests")),
1597         optparse.make_option("--run-part", help=("Run a specified part (n:m), "
1598                   "the nth of m parts, of the layout tests")),
1599         # old-run-webkit-tests calls --batch-size: --nthly n
1600         #   Restart DumpRenderTree every n tests (default: 1000)
1601         optparse.make_option("--batch-size",
1602             help=("Run a the tests in batches (n), after every n tests, "
1603                   "DumpRenderTree is relaunched."), type="int", default=0),
1604         # old-run-webkit-tests calls --run-singly: -1|--singly
1605         # Isolate each test case run (implies --nthly 1 --verbose)
1606         optparse.make_option("--run-singly", action="store_true",
1607             default=False, help="run a separate DumpRenderTree for each test"),
1608         optparse.make_option("--child-processes",
1609             help="Number of DumpRenderTrees to run in parallel."),
1610         # FIXME: Display default number of child processes that will run.
1611         optparse.make_option("--experimental-fully-parallel",
1612             action="store_true", default=False,
1613             help="run all tests in parallel"),
1614         # FIXME: Need --exit-after-n-failures N
1615         #      Exit after the first N failures instead of running all tests
1616         # FIXME: Need --exit-after-n-crashes N
1617         #      Exit after the first N crashes instead of running all tests
1618         # FIXME: consider: --iterations n
1619         #      Number of times to run the set of tests (e.g. ABCABCABC)
1620         optparse.make_option("--print-last-failures", action="store_true",
1621             default=False, help="Print the tests in the last run that "
1622             "had unexpected failures (or passes)."),
1623         optparse.make_option("--retest-last-failures", action="store_true",
1624             default=False, help="re-test the tests in the last run that "
1625             "had unexpected failures (or passes)."),
1626         optparse.make_option("--retry-failures", action="store_true",
1627             default=True,
1628             help="Re-try any tests that produce unexpected results (default)"),
1629         optparse.make_option("--no-retry-failures", action="store_false",
1630             dest="retry_failures",
1631             help="Don't re-try any tests that produce unexpected results."),
1632     ]
1633
1634     misc_options = [
1635         optparse.make_option("--lint-test-files", action="store_true",
1636         default=False, help=("Makes sure the test files parse for all "
1637                             "configurations. Does not run any tests.")),
1638     ]
1639
1640     # FIXME: Move these into json_results_generator.py
1641     results_json_options = [
1642         optparse.make_option("--master-name", help="The name of the buildbot master."),
1643         optparse.make_option("--builder-name", default="DUMMY_BUILDER_NAME",
1644             help=("The name of the builder shown on the waterfall running "
1645                   "this script e.g. WebKit.")),
1646         optparse.make_option("--build-name", default="DUMMY_BUILD_NAME",
1647             help=("The name of the builder used in its path, e.g. "
1648                   "webkit-rel.")),
1649         optparse.make_option("--build-number", default="DUMMY_BUILD_NUMBER",
1650             help=("The build number of the builder running this script.")),
1651         optparse.make_option("--test-results-server", default="",
1652             help=("If specified, upload results json files to this appengine "
1653                   "server.")),
1654         optparse.make_option("--upload-full-results",
1655             action="store_true",
1656             default=False,
1657             help="If true, upload full json results to server."),
1658     ]
1659
1660     option_list = (configuration_options + print_options +
1661                    chromium_options + results_options + test_options +
1662                    misc_options + results_json_options +
1663                    old_run_webkit_tests_compat)
1664     option_parser = optparse.OptionParser(option_list=option_list)
1665
1666     options, args = option_parser.parse_args(args)
1667
1668     return options, args
1669
1670
1671
1672 def main():
1673     options, args = parse_args()
1674     port_obj = port.get(options.platform, options)
1675     return run(port_obj, options, args)
1676
1677 if '__main__' == __name__:
1678     try:
1679         sys.exit(main())
1680     except KeyboardInterrupt:
1681         # this mirrors what the shell normally does
1682         sys.exit(signal.SIGINT + 128)