2011-01-18 Dirk Pranke <dpranke@chromium.org>
[WebKit.git] / Tools / Scripts / webkitpy / layout_tests / layout_package / test_runner.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """
32 The TestRunner class runs a series of tests (TestType interface) against a set
33 of test files.  If a test file fails a TestType, it returns a list TestFailure
34 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
35 create a final report.
36 """
37
38 from __future__ import with_statement
39
40 import codecs
41 import errno
42 import logging
43 import math
44 import os
45 import Queue
46 import random
47 import shutil
48 import sys
49 import time
50
51 from result_summary import ResultSummary
52 from test_input import TestInput
53
54 import dump_render_tree_thread
55 import json_layout_results_generator
56 import message_broker
57 import printing
58 import test_expectations
59 import test_failures
60 import test_results
61 import test_results_uploader
62
63 from webkitpy.thirdparty import simplejson
64 from webkitpy.tool import grammar
65
66 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
67
68 # Builder base URL where we have the archived test results.
69 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
70
71 LAYOUT_TESTS_DIRECTORY = "LayoutTests" + os.sep
72
73 TestExpectationsFile = test_expectations.TestExpectationsFile
74
75
76 def summarize_unexpected_results(port_obj, expectations, result_summary,
77                                  retry_summary):
78     """Summarize any unexpected results as a dict.
79
80     FIXME: split this data structure into a separate class?
81
82     Args:
83         port_obj: interface to port-specific hooks
84         expectations: test_expectations.TestExpectations object
85         result_summary: summary object from initial test runs
86         retry_summary: summary object from final test run of retried tests
87     Returns:
88         A dictionary containing a summary of the unexpected results from the
89         run, with the following fields:
90         'version': a version indicator (1 in this version)
91         'fixable': # of fixable tests (NOW - PASS)
92         'skipped': # of skipped tests (NOW & SKIPPED)
93         'num_regressions': # of non-flaky failures
94         'num_flaky': # of flaky failures
95         'num_passes': # of unexpected passes
96         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
97     """
98     results = {}
99     results['version'] = 1
100
101     tbe = result_summary.tests_by_expectation
102     tbt = result_summary.tests_by_timeline
103     results['fixable'] = len(tbt[test_expectations.NOW] -
104                                 tbe[test_expectations.PASS])
105     results['skipped'] = len(tbt[test_expectations.NOW] &
106                                 tbe[test_expectations.SKIP])
107
108     num_passes = 0
109     num_flaky = 0
110     num_regressions = 0
111     keywords = {}
112     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
113         keywords[v] = k.upper()
114
115     tests = {}
116     for filename, result in result_summary.unexpected_results.iteritems():
117         # Note that if a test crashed in the original run, we ignore
118         # whether or not it crashed when we retried it (if we retried it),
119         # and always consider the result not flaky.
120         test = port_obj.relative_test_filename(filename)
121         expected = expectations.get_expectations_string(filename)
122         actual = [keywords[result]]
123
124         if result == test_expectations.PASS:
125             num_passes += 1
126         elif result == test_expectations.CRASH:
127             num_regressions += 1
128         else:
129             if filename not in retry_summary.unexpected_results:
130                 actual.extend(expectations.get_expectations_string(
131                     filename).split(" "))
132                 num_flaky += 1
133             else:
134                 retry_result = retry_summary.unexpected_results[filename]
135                 if result != retry_result:
136                     actual.append(keywords[retry_result])
137                     num_flaky += 1
138                 else:
139                     num_regressions += 1
140
141         tests[test] = {}
142         tests[test]['expected'] = expected
143         tests[test]['actual'] = " ".join(actual)
144
145     results['tests'] = tests
146     results['num_passes'] = num_passes
147     results['num_flaky'] = num_flaky
148     results['num_regressions'] = num_regressions
149
150     return results
151
152
153 class TestRunInterruptedException(Exception):
154     """Raised when a test run should be stopped immediately."""
155     def __init__(self, reason):
156         self.reason = reason
157
158
159 class TestRunner:
160     """A class for managing running a series of tests on a series of layout
161     test files."""
162
163     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
164     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
165
166     # The per-test timeout in milliseconds, if no --time-out-ms option was
167     # given to run_webkit_tests. This should correspond to the default timeout
168     # in DumpRenderTree.
169     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
170
171     def __init__(self, port, options, printer):
172         """Initialize test runner data structures.
173
174         Args:
175           port: an object implementing port-specific
176           options: a dictionary of command line options
177           printer: a Printer object to record updates to.
178         """
179         self._port = port
180         self._options = options
181         self._printer = printer
182         self._message_broker = None
183
184         # disable wss server. need to install pyOpenSSL on buildbots.
185         # self._websocket_secure_server = websocket_server.PyWebSocket(
186         #        options.results_directory, use_tls=True, port=9323)
187
188         # a set of test files, and the same tests as a list
189         self._test_files = set()
190         self._test_files_list = None
191         self._result_queue = Queue.Queue()
192         self._retrying = False
193
194     def collect_tests(self, args, last_unexpected_results):
195         """Find all the files to test.
196
197         Args:
198           args: list of test arguments from the command line
199           last_unexpected_results: list of unexpected results to retest, if any
200
201         """
202         paths = self._strip_test_dir_prefixes(args)
203         paths += last_unexpected_results
204         if self._options.test_list:
205             paths += self._strip_test_dir_prefixes(read_test_files(self._options.test_list))
206         self._test_files = self._port.tests(paths)
207
208     def _strip_test_dir_prefixes(self, paths):
209         return [self._strip_test_dir_prefix(path) for path in paths if path]
210
211     def _strip_test_dir_prefix(self, path):
212         if path.startswith(LAYOUT_TESTS_DIRECTORY):
213             return path[len(LAYOUT_TESTS_DIRECTORY):]
214         return path
215
216     def lint(self):
217         lint_failed = False
218
219         # Creating the expecations for each platform/configuration pair does
220         # all the test list parsing and ensures it's correct syntax (e.g. no
221         # dupes).
222         for platform_name in self._port.test_platform_names():
223             try:
224                 self.parse_expectations(platform_name, is_debug_mode=True)
225             except test_expectations.ParseError:
226                 lint_failed = True
227             try:
228                 self.parse_expectations(platform_name, is_debug_mode=False)
229             except test_expectations.ParseError:
230                 lint_failed = True
231
232         self._printer.write("")
233         if lint_failed:
234             _log.error("Lint failed.")
235             return -1
236
237         _log.info("Lint succeeded.")
238         return 0
239
240     def parse_expectations(self, test_platform_name, is_debug_mode):
241         """Parse the expectations from the test_list files and return a data
242         structure holding them. Throws an error if the test_list files have
243         invalid syntax."""
244         if self._options.lint_test_files:
245             test_files = None
246         else:
247             test_files = self._test_files
248
249         expectations_str = self._port.test_expectations()
250         overrides_str = self._port.test_expectations_overrides()
251         self._expectations = test_expectations.TestExpectations(
252             self._port, test_files, expectations_str, test_platform_name,
253             is_debug_mode, self._options.lint_test_files,
254             overrides=overrides_str)
255         return self._expectations
256
257     # FIXME: This method is way too long and needs to be broken into pieces.
258     def prepare_lists_and_print_output(self):
259         """Create appropriate subsets of test lists and returns a
260         ResultSummary object. Also prints expected test counts.
261         """
262
263         # Remove skipped - both fixable and ignored - files from the
264         # top-level list of files to test.
265         num_all_test_files = len(self._test_files)
266         self._printer.print_expected("Found:  %d tests" %
267                                      (len(self._test_files)))
268         if not num_all_test_files:
269             _log.critical('No tests to run.')
270             return None
271
272         skipped = set()
273         if num_all_test_files > 1 and not self._options.force:
274             skipped = self._expectations.get_tests_with_result_type(
275                            test_expectations.SKIP)
276             self._test_files -= skipped
277
278         # Create a sorted list of test files so the subset chunk,
279         # if used, contains alphabetically consecutive tests.
280         self._test_files_list = list(self._test_files)
281         if self._options.randomize_order:
282             random.shuffle(self._test_files_list)
283         else:
284             self._test_files_list.sort()
285
286         # If the user specifies they just want to run a subset of the tests,
287         # just grab a subset of the non-skipped tests.
288         if self._options.run_chunk or self._options.run_part:
289             chunk_value = self._options.run_chunk or self._options.run_part
290             test_files = self._test_files_list
291             try:
292                 (chunk_num, chunk_len) = chunk_value.split(":")
293                 chunk_num = int(chunk_num)
294                 assert(chunk_num >= 0)
295                 test_size = int(chunk_len)
296                 assert(test_size > 0)
297             except:
298                 _log.critical("invalid chunk '%s'" % chunk_value)
299                 return None
300
301             # Get the number of tests
302             num_tests = len(test_files)
303
304             # Get the start offset of the slice.
305             if self._options.run_chunk:
306                 chunk_len = test_size
307                 # In this case chunk_num can be really large. We need
308                 # to make the slave fit in the current number of tests.
309                 slice_start = (chunk_num * chunk_len) % num_tests
310             else:
311                 # Validate the data.
312                 assert(test_size <= num_tests)
313                 assert(chunk_num <= test_size)
314
315                 # To count the chunk_len, and make sure we don't skip
316                 # some tests, we round to the next value that fits exactly
317                 # all the parts.
318                 rounded_tests = num_tests
319                 if rounded_tests % test_size != 0:
320                     rounded_tests = (num_tests + test_size -
321                                      (num_tests % test_size))
322
323                 chunk_len = rounded_tests / test_size
324                 slice_start = chunk_len * (chunk_num - 1)
325                 # It does not mind if we go over test_size.
326
327             # Get the end offset of the slice.
328             slice_end = min(num_tests, slice_start + chunk_len)
329
330             files = test_files[slice_start:slice_end]
331
332             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
333                 (slice_end - slice_start), slice_start, slice_end, num_tests)
334             self._printer.print_expected(tests_run_msg)
335
336             # If we reached the end and we don't have enough tests, we run some
337             # from the beginning.
338             if slice_end - slice_start < chunk_len:
339                 extra = chunk_len - (slice_end - slice_start)
340                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
341                             extra)
342                 self._printer.print_expected(extra_msg)
343                 tests_run_msg += "\n" + extra_msg
344                 files.extend(test_files[0:extra])
345             tests_run_filename = os.path.join(self._options.results_directory,
346                                               "tests_run.txt")
347             with codecs.open(tests_run_filename, "w", "utf-8") as file:
348                 file.write(tests_run_msg + "\n")
349
350             len_skip_chunk = int(len(files) * len(skipped) /
351                                  float(len(self._test_files)))
352             skip_chunk_list = list(skipped)[0:len_skip_chunk]
353             skip_chunk = set(skip_chunk_list)
354
355             # Update expectations so that the stats are calculated correctly.
356             # We need to pass a list that includes the right # of skipped files
357             # to ParseExpectations so that ResultSummary() will get the correct
358             # stats. So, we add in the subset of skipped files, and then
359             # subtract them back out.
360             self._test_files_list = files + skip_chunk_list
361             self._test_files = set(self._test_files_list)
362
363             self._expectations = self.parse_expectations(
364                 self._port.test_platform_name(),
365                 self._options.configuration == 'Debug')
366
367             self._test_files = set(files)
368             self._test_files_list = files
369         else:
370             skip_chunk = skipped
371
372         result_summary = ResultSummary(self._expectations,
373             self._test_files | skip_chunk)
374         self._print_expected_results_of_type(result_summary,
375             test_expectations.PASS, "passes")
376         self._print_expected_results_of_type(result_summary,
377             test_expectations.FAIL, "failures")
378         self._print_expected_results_of_type(result_summary,
379             test_expectations.FLAKY, "flaky")
380         self._print_expected_results_of_type(result_summary,
381             test_expectations.SKIP, "skipped")
382
383         if self._options.force:
384             self._printer.print_expected('Running all tests, including '
385                                          'skips (--force)')
386         else:
387             # Note that we don't actually run the skipped tests (they were
388             # subtracted out of self._test_files, above), but we stub out the
389             # results here so the statistics can remain accurate.
390             for test in skip_chunk:
391                 result = test_results.TestResult(test)
392                 result.type = test_expectations.SKIP
393                 result_summary.add(result, expected=True)
394         self._printer.print_expected('')
395
396         # Check to make sure we didn't filter out all of the tests.
397         if not len(self._test_files):
398             _log.info("All tests are being skipped")
399             return None
400
401         return result_summary
402
403     def _get_dir_for_test_file(self, test_file):
404         """Returns the highest-level directory by which to shard the given
405         test file."""
406         index = test_file.rfind(os.sep + LAYOUT_TESTS_DIRECTORY)
407
408         test_file = test_file[index + len(LAYOUT_TESTS_DIRECTORY):]
409         test_file_parts = test_file.split(os.sep, 1)
410         directory = test_file_parts[0]
411         test_file = test_file_parts[1]
412
413         # The http tests are very stable on mac/linux.
414         # TODO(ojan): Make the http server on Windows be apache so we can
415         # turn shard the http tests there as well. Switching to apache is
416         # what made them stable on linux/mac.
417         return_value = directory
418         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
419                 and test_file.find(os.sep) >= 0):
420             test_file_parts = test_file.split(os.sep, 1)
421             directory = test_file_parts[0]
422             return_value = os.path.join(return_value, directory)
423             test_file = test_file_parts[1]
424
425         return return_value
426
427     def _get_test_input_for_file(self, test_file):
428         """Returns the appropriate TestInput object for the file. Mostly this
429         is used for looking up the timeout value (in ms) to use for the given
430         test."""
431         if self._test_is_slow(test_file):
432             return TestInput(test_file, self._options.slow_time_out_ms)
433         return TestInput(test_file, self._options.time_out_ms)
434
435     def _test_requires_lock(self, test_file):
436         """Return True if the test needs to be locked when
437         running multiple copies of NRWTs."""
438         split_path = test_file.split(os.sep)
439         return 'http' in split_path or 'websocket' in split_path
440
441     def _test_is_slow(self, test_file):
442         return self._expectations.has_modifier(test_file,
443                                                test_expectations.SLOW)
444
445     def _shard_tests(self, test_files, use_real_shards):
446         """Groups tests into batches.
447         This helps ensure that tests that depend on each other (aka bad tests!)
448         continue to run together as most cross-tests dependencies tend to
449         occur within the same directory. If use_real_shards is False, we
450         put each (non-HTTP/websocket) test into its own shard for maximum
451         concurrency instead of trying to do any sort of real sharding.
452
453         Return:
454             A list of lists of TestInput objects.
455         """
456         # FIXME: when we added http locking, we changed how this works such
457         # that we always lump all of the HTTP threads into a single shard.
458         # That will slow down experimental-fully-parallel, but it's unclear
459         # what the best alternative is completely revamping how we track
460         # when to grab the lock.
461
462         test_lists = []
463         tests_to_http_lock = []
464         if not use_real_shards:
465             for test_file in test_files:
466                 test_input = self._get_test_input_for_file(test_file)
467                 if self._test_requires_lock(test_file):
468                     tests_to_http_lock.append(test_input)
469                 else:
470                     test_lists.append((".", [test_input]))
471         else:
472             tests_by_dir = {}
473             for test_file in test_files:
474                 directory = self._get_dir_for_test_file(test_file)
475                 test_input = self._get_test_input_for_file(test_file)
476                 if self._test_requires_lock(test_file):
477                     tests_to_http_lock.append(test_input)
478                 else:
479                     tests_by_dir.setdefault(directory, [])
480                     tests_by_dir[directory].append(test_input)
481             # Sort by the number of tests in the dir so that the ones with the
482             # most tests get run first in order to maximize parallelization.
483             # Number of tests is a good enough, but not perfect, approximation
484             # of how long that set of tests will take to run. We can't just use
485             # a PriorityQueue until we move to Python 2.6.
486             for directory in tests_by_dir:
487                 test_list = tests_by_dir[directory]
488                 # Keep the tests in alphabetical order.
489                 # FIXME: Remove once tests are fixed so they can be run in any
490                 # order.
491                 test_list.reverse()
492                 test_list_tuple = (directory, test_list)
493                 test_lists.append(test_list_tuple)
494             test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
495
496         # Put the http tests first. There are only a couple hundred of them,
497         # but each http test takes a very long time to run, so sorting by the
498         # number of tests doesn't accurately capture how long they take to run.
499         if tests_to_http_lock:
500             tests_to_http_lock.reverse()
501             test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
502
503         return test_lists
504
505     def _contains_tests(self, subdir):
506         for test_file in self._test_files:
507             if test_file.find(subdir) >= 0:
508                 return True
509         return False
510
511     def _num_workers(self):
512         return int(self._options.child_processes)
513
514     def _run_tests(self, file_list, result_summary):
515         """Runs the tests in the file_list.
516
517         Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
518             test_timings, individual_test_timings)
519             interrupted is whether the run was interrupted
520             keyboard_interrupted is whether the interruption was because someone
521               typed Ctrl^C
522             thread_timings is a list of dicts with the total runtime
523               of each thread with 'name', 'num_tests', 'total_time' properties
524             test_timings is a list of timings for each sharded subdirectory
525               of the form [time, directory_name, num_tests]
526             individual_test_timings is a list of run times for each test
527               in the form {filename:filename, test_run_time:test_run_time}
528             result_summary: summary object to populate with the results
529         """
530
531         self._printer.print_update('Sharding tests ...')
532         num_workers = self._num_workers()
533         test_lists = self._shard_tests(file_list,
534             num_workers > 1 and not self._options.experimental_fully_parallel)
535         filename_queue = Queue.Queue()
536         for item in test_lists:
537             filename_queue.put(item)
538
539         self._printer.print_update('Starting %s ...' %
540                                    grammar.pluralize('worker', num_workers))
541         self._message_broker = message_broker.get(self._port, self._options)
542         broker = self._message_broker
543         self._current_filename_queue = filename_queue
544         self._current_result_summary = result_summary
545
546         if not self._options.dry_run:
547             threads = broker.start_workers(self)
548         else:
549             threads = {}
550
551         self._printer.print_update("Starting testing ...")
552         keyboard_interrupted = False
553         interrupted = False
554         if not self._options.dry_run:
555             try:
556                 broker.run_message_loop()
557             except KeyboardInterrupt:
558                 _log.info("Interrupted, exiting")
559                 broker.cancel_workers()
560                 keyboard_interrupted = True
561                 interrupted = True
562             except TestRunInterruptedException, e:
563                 _log.info(e.reason)
564                 broker.cancel_workers()
565                 interrupted = True
566             except:
567                 # Unexpected exception; don't try to clean up workers.
568                 _log.info("Exception raised, exiting")
569                 raise
570
571         thread_timings, test_timings, individual_test_timings = \
572             self._collect_timing_info(threads)
573
574         broker.cleanup()
575         self._message_broker = None
576         return (interrupted, keyboard_interrupted, thread_timings, test_timings,
577                 individual_test_timings)
578
579     def update(self):
580         self.update_summary(self._current_result_summary)
581
582     def _collect_timing_info(self, threads):
583         test_timings = {}
584         individual_test_timings = []
585         thread_timings = []
586
587         for thread in threads:
588             thread_timings.append({'name': thread.getName(),
589                                    'num_tests': thread.get_num_tests(),
590                                    'total_time': thread.get_total_time()})
591             test_timings.update(thread.get_test_group_timing_stats())
592             individual_test_timings.extend(thread.get_test_results())
593
594         return (thread_timings, test_timings, individual_test_timings)
595
596     def needs_http(self):
597         """Returns whether the test runner needs an HTTP server."""
598         return self._contains_tests(self.HTTP_SUBDIR)
599
600     def needs_websocket(self):
601         """Returns whether the test runner needs a WEBSOCKET server."""
602         return self._contains_tests(self.WEBSOCKET_SUBDIR)
603
604     def set_up_run(self):
605         """Configures the system to be ready to run tests.
606
607         Returns a ResultSummary object if we should continue to run tests,
608         or None if we should abort.
609
610         """
611         # This must be started before we check the system dependencies,
612         # since the helper may do things to make the setup correct.
613         self._printer.print_update("Starting helper ...")
614         self._port.start_helper()
615
616         # Check that the system dependencies (themes, fonts, ...) are correct.
617         if not self._options.nocheck_sys_deps:
618             self._printer.print_update("Checking system dependencies ...")
619             if not self._port.check_sys_deps(self.needs_http()):
620                 self._port.stop_helper()
621                 return None
622
623         if self._options.clobber_old_results:
624             self._clobber_old_results()
625
626         # Create the output directory if it doesn't already exist.
627         self._port.maybe_make_directory(self._options.results_directory)
628
629         self._port.setup_test_run()
630
631         self._printer.print_update("Preparing tests ...")
632         result_summary = self.prepare_lists_and_print_output()
633         if not result_summary:
634             return None
635
636         return result_summary
637
638     def run(self, result_summary):
639         """Run all our tests on all our test files.
640
641         For each test file, we run each test type. If there are any failures,
642         we collect them for reporting.
643
644         Args:
645           result_summary: a summary object tracking the test results.
646
647         Return:
648           The number of unexpected results (0 == success)
649         """
650         # gather_test_files() must have been called first to initialize us.
651         # If we didn't find any files to test, we've errored out already in
652         # prepare_lists_and_print_output().
653         assert(len(self._test_files))
654
655         start_time = time.time()
656
657         interrupted, keyboard_interrupted, thread_timings, test_timings, \
658             individual_test_timings = (
659             self._run_tests(self._test_files_list, result_summary))
660
661         # We exclude the crashes from the list of results to retry, because
662         # we want to treat even a potentially flaky crash as an error.
663         failures = self._get_failures(result_summary, include_crashes=False)
664         retry_summary = result_summary
665         while (len(failures) and self._options.retry_failures and
666             not self._retrying and not interrupted):
667             _log.info('')
668             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
669             _log.info('')
670             self._retrying = True
671             retry_summary = ResultSummary(self._expectations, failures.keys())
672             # Note that we intentionally ignore the return value here.
673             self._run_tests(failures.keys(), retry_summary)
674             failures = self._get_failures(retry_summary, include_crashes=True)
675
676         end_time = time.time()
677
678         self._print_timing_statistics(end_time - start_time,
679                                       thread_timings, test_timings,
680                                       individual_test_timings,
681                                       result_summary)
682
683         self._print_result_summary(result_summary)
684
685         sys.stdout.flush()
686         sys.stderr.flush()
687
688         self._printer.print_one_line_summary(result_summary.total,
689                                              result_summary.expected,
690                                              result_summary.unexpected)
691
692         unexpected_results = summarize_unexpected_results(self._port,
693             self._expectations, result_summary, retry_summary)
694         self._printer.print_unexpected_results(unexpected_results)
695
696         if (self._options.record_results and not self._options.dry_run and
697             not interrupted):
698             # Write the same data to log files and upload generated JSON files
699             # to appengine server.
700             self._upload_json_files(unexpected_results, result_summary,
701                                     individual_test_timings)
702
703         # Write the summary to disk (results.html) and display it if requested.
704         if not self._options.dry_run:
705             wrote_results = self._write_results_html_file(result_summary)
706             if self._options.show_results and wrote_results:
707                 self._show_results_html_file()
708
709         # Now that we've completed all the processing we can, we re-raise
710         # a KeyboardInterrupt if necessary so the caller can handle it.
711         if keyboard_interrupted:
712             raise KeyboardInterrupt
713
714         # Ignore flaky failures and unexpected passes so we don't turn the
715         # bot red for those.
716         return unexpected_results['num_regressions']
717
718     def clean_up_run(self):
719         """Restores the system after we're done running tests."""
720
721         _log.debug("flushing stdout")
722         sys.stdout.flush()
723         _log.debug("flushing stderr")
724         sys.stderr.flush()
725         _log.debug("stopping helper")
726         self._port.stop_helper()
727
728     def update_summary(self, result_summary):
729         """Update the summary and print results with any completed tests."""
730         while True:
731             try:
732                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
733             except Queue.Empty:
734                 return
735
736             expected = self._expectations.matches_an_expected_result(
737                 result.filename, result.type, self._options.pixel_tests)
738             result_summary.add(result, expected)
739             exp_str = self._expectations.get_expectations_string(
740                 result.filename)
741             got_str = self._expectations.expectation_to_string(result.type)
742             self._printer.print_test_result(result, expected, exp_str, got_str)
743             self._printer.print_progress(result_summary, self._retrying,
744                                          self._test_files_list)
745
746             def interrupt_if_at_failure_limit(limit, count, message):
747                 if limit and count >= limit:
748                     raise TestRunInterruptedException(message % count)
749
750             interrupt_if_at_failure_limit(
751                 self._options.exit_after_n_failures,
752                 result_summary.unexpected_failures,
753                 "Aborting run since %d failures were reached")
754             interrupt_if_at_failure_limit(
755                 self._options.exit_after_n_crashes_or_timeouts,
756                 result_summary.unexpected_crashes_or_timeouts,
757                 "Aborting run since %d crashes or timeouts were reached")
758
759     def _clobber_old_results(self):
760         # Just clobber the actual test results directories since the other
761         # files in the results directory are explicitly used for cross-run
762         # tracking.
763         self._printer.print_update("Clobbering old results in %s" %
764                                    self._options.results_directory)
765         layout_tests_dir = self._port.layout_tests_dir()
766         possible_dirs = self._port.test_dirs()
767         for dirname in possible_dirs:
768             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
769                 shutil.rmtree(os.path.join(self._options.results_directory,
770                                            dirname),
771                               ignore_errors=True)
772
773     def _get_failures(self, result_summary, include_crashes):
774         """Filters a dict of results and returns only the failures.
775
776         Args:
777           result_summary: the results of the test run
778           include_crashes: whether crashes are included in the output.
779             We use False when finding the list of failures to retry
780             to see if the results were flaky. Although the crashes may also be
781             flaky, we treat them as if they aren't so that they're not ignored.
782         Returns:
783           a dict of files -> results
784         """
785         failed_results = {}
786         for test, result in result_summary.unexpected_results.iteritems():
787             if (result == test_expectations.PASS or
788                 result == test_expectations.CRASH and not include_crashes):
789                 continue
790             failed_results[test] = result
791
792         return failed_results
793
794     def _upload_json_files(self, unexpected_results, result_summary,
795                         individual_test_timings):
796         """Writes the results of the test run as JSON files into the results
797         dir and upload the files to the appengine server.
798
799         There are three different files written into the results dir:
800           unexpected_results.json: A short list of any unexpected results.
801             This is used by the buildbots to display results.
802           expectations.json: This is used by the flakiness dashboard.
803           results.json: A full list of the results - used by the flakiness
804             dashboard and the aggregate results dashboard.
805
806         Args:
807           unexpected_results: dict of unexpected results
808           result_summary: full summary object
809           individual_test_timings: list of test times (used by the flakiness
810             dashboard).
811         """
812         results_directory = self._options.results_directory
813         _log.debug("Writing JSON files in %s." % results_directory)
814         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
815         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
816             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
817
818         # Write a json file of the test_expectations.txt file for the layout
819         # tests dashboard.
820         expectations_path = os.path.join(results_directory, "expectations.json")
821         expectations_json = \
822             self._expectations.get_expectations_json_for_all_platforms()
823         with codecs.open(expectations_path, "w", "utf-8") as file:
824             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
825
826         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
827             self._port, self._options.builder_name, self._options.build_name,
828             self._options.build_number, self._options.results_directory,
829             BUILDER_BASE_URL, individual_test_timings,
830             self._expectations, result_summary, self._test_files_list,
831             not self._options.upload_full_results,
832             self._options.test_results_server,
833             "layout-tests",
834             self._options.master_name)
835
836         _log.debug("Finished writing JSON files.")
837
838         json_files = ["expectations.json"]
839         if self._options.upload_full_results:
840             json_files.append("results.json")
841         else:
842             json_files.append("incremental_results.json")
843
844         generator.upload_json_files(json_files)
845
846     def _print_config(self):
847         """Prints the configuration for the test run."""
848         p = self._printer
849         p.print_config("Using port '%s'" % self._port.name())
850         p.print_config("Placing test results in %s" %
851                        self._options.results_directory)
852         if self._options.new_baseline:
853             p.print_config("Placing new baselines in %s" %
854                            self._port.baseline_path())
855         p.print_config("Using %s build" % self._options.configuration)
856         if self._options.pixel_tests:
857             p.print_config("Pixel tests enabled")
858         else:
859             p.print_config("Pixel tests disabled")
860
861         p.print_config("Regular timeout: %s, slow test timeout: %s" %
862                        (self._options.time_out_ms,
863                         self._options.slow_time_out_ms))
864
865         if self._num_workers() == 1:
866             p.print_config("Running one %s" % self._port.driver_name())
867         else:
868             p.print_config("Running %s %ss in parallel" %
869                            (self._options.child_processes,
870                             self._port.driver_name()))
871         p.print_config('Command line: ' +
872                        ' '.join(self._port.driver_cmd_line()))
873         p.print_config("Worker model: %s" % self._options.worker_model)
874         p.print_config("")
875
876     def _print_expected_results_of_type(self, result_summary,
877                                         result_type, result_type_str):
878         """Print the number of the tests in a given result class.
879
880         Args:
881           result_summary - the object containing all the results to report on
882           result_type - the particular result type to report in the summary.
883           result_type_str - a string description of the result_type.
884         """
885         tests = self._expectations.get_tests_with_result_type(result_type)
886         now = result_summary.tests_by_timeline[test_expectations.NOW]
887         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
888
889         # We use a fancy format string in order to print the data out in a
890         # nicely-aligned table.
891         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
892                   % (self._num_digits(now), self._num_digits(wontfix)))
893         self._printer.print_expected(fmtstr %
894             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
895
896     def _num_digits(self, num):
897         """Returns the number of digits needed to represent the length of a
898         sequence."""
899         ndigits = 1
900         if len(num):
901             ndigits = int(math.log10(len(num))) + 1
902         return ndigits
903
904     def _print_timing_statistics(self, total_time, thread_timings,
905                                directory_test_timings, individual_test_timings,
906                                result_summary):
907         """Record timing-specific information for the test run.
908
909         Args:
910           total_time: total elapsed time (in seconds) for the test run
911           thread_timings: wall clock time each thread ran for
912           directory_test_timings: timing by directory
913           individual_test_timings: timing by file
914           result_summary: summary object for the test run
915         """
916         self._printer.print_timing("Test timing:")
917         self._printer.print_timing("  %6.2f total testing time" % total_time)
918         self._printer.print_timing("")
919         self._printer.print_timing("Thread timing:")
920         cuml_time = 0
921         for t in thread_timings:
922             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
923                   (t['name'], t['num_tests'], t['total_time']))
924             cuml_time += t['total_time']
925         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
926               (cuml_time, cuml_time / int(self._options.child_processes)))
927         self._printer.print_timing("")
928
929         self._print_aggregate_test_statistics(individual_test_timings)
930         self._print_individual_test_times(individual_test_timings,
931                                           result_summary)
932         self._print_directory_timings(directory_test_timings)
933
934     def _print_aggregate_test_statistics(self, individual_test_timings):
935         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
936         Args:
937           individual_test_timings: List of TestResults for all tests.
938         """
939         test_types = []  # Unit tests don't actually produce any timings.
940         if individual_test_timings:
941             test_types = individual_test_timings[0].time_for_diffs.keys()
942         times_for_dump_render_tree = []
943         times_for_diff_processing = []
944         times_per_test_type = {}
945         for test_type in test_types:
946             times_per_test_type[test_type] = []
947
948         for test_stats in individual_test_timings:
949             times_for_dump_render_tree.append(test_stats.test_run_time)
950             times_for_diff_processing.append(
951                 test_stats.total_time_for_all_diffs)
952             time_for_diffs = test_stats.time_for_diffs
953             for test_type in test_types:
954                 times_per_test_type[test_type].append(
955                     time_for_diffs[test_type])
956
957         self._print_statistics_for_test_timings(
958             "PER TEST TIME IN TESTSHELL (seconds):",
959             times_for_dump_render_tree)
960         self._print_statistics_for_test_timings(
961             "PER TEST DIFF PROCESSING TIMES (seconds):",
962             times_for_diff_processing)
963         for test_type in test_types:
964             self._print_statistics_for_test_timings(
965                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
966                 times_per_test_type[test_type])
967
968     def _print_individual_test_times(self, individual_test_timings,
969                                   result_summary):
970         """Prints the run times for slow, timeout and crash tests.
971         Args:
972           individual_test_timings: List of TestStats for all tests.
973           result_summary: summary object for test run
974         """
975         # Reverse-sort by the time spent in DumpRenderTree.
976         individual_test_timings.sort(lambda a, b:
977             cmp(b.test_run_time, a.test_run_time))
978
979         num_printed = 0
980         slow_tests = []
981         timeout_or_crash_tests = []
982         unexpected_slow_tests = []
983         for test_tuple in individual_test_timings:
984             filename = test_tuple.filename
985             is_timeout_crash_or_slow = False
986             if self._test_is_slow(filename):
987                 is_timeout_crash_or_slow = True
988                 slow_tests.append(test_tuple)
989
990             if filename in result_summary.failures:
991                 result = result_summary.results[filename].type
992                 if (result == test_expectations.TIMEOUT or
993                     result == test_expectations.CRASH):
994                     is_timeout_crash_or_slow = True
995                     timeout_or_crash_tests.append(test_tuple)
996
997             if (not is_timeout_crash_or_slow and
998                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
999                 num_printed = num_printed + 1
1000                 unexpected_slow_tests.append(test_tuple)
1001
1002         self._printer.print_timing("")
1003         self._print_test_list_timing("%s slowest tests that are not "
1004             "marked as SLOW and did not timeout/crash:" %
1005             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1006         self._printer.print_timing("")
1007         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1008         self._printer.print_timing("")
1009         self._print_test_list_timing("Tests that timed out or crashed:",
1010                                      timeout_or_crash_tests)
1011         self._printer.print_timing("")
1012
1013     def _print_test_list_timing(self, title, test_list):
1014         """Print timing info for each test.
1015
1016         Args:
1017           title: section heading
1018           test_list: tests that fall in this section
1019         """
1020         if self._printer.disabled('slowest'):
1021             return
1022
1023         self._printer.print_timing(title)
1024         for test_tuple in test_list:
1025             filename = test_tuple.filename[len(
1026                 self._port.layout_tests_dir()) + 1:]
1027             filename = filename.replace('\\', '/')
1028             test_run_time = round(test_tuple.test_run_time, 1)
1029             self._printer.print_timing("  %s took %s seconds" %
1030                                        (filename, test_run_time))
1031
1032     def _print_directory_timings(self, directory_test_timings):
1033         """Print timing info by directory for any directories that
1034         take > 10 seconds to run.
1035
1036         Args:
1037           directory_test_timing: time info for each directory
1038         """
1039         timings = []
1040         for directory in directory_test_timings:
1041             num_tests, time_for_directory = directory_test_timings[directory]
1042             timings.append((round(time_for_directory, 1), directory,
1043                             num_tests))
1044         timings.sort()
1045
1046         self._printer.print_timing("Time to process slowest subdirectories:")
1047         min_seconds_to_print = 10
1048         for timing in timings:
1049             if timing[0] > min_seconds_to_print:
1050                 self._printer.print_timing(
1051                     "  %s took %s seconds to run %s tests." % (timing[1],
1052                     timing[0], timing[2]))
1053         self._printer.print_timing("")
1054
1055     def _print_statistics_for_test_timings(self, title, timings):
1056         """Prints the median, mean and standard deviation of the values in
1057         timings.
1058
1059         Args:
1060           title: Title for these timings.
1061           timings: A list of floats representing times.
1062         """
1063         self._printer.print_timing(title)
1064         timings.sort()
1065
1066         num_tests = len(timings)
1067         if not num_tests:
1068             return
1069         percentile90 = timings[int(.9 * num_tests)]
1070         percentile99 = timings[int(.99 * num_tests)]
1071
1072         if num_tests % 2 == 1:
1073             median = timings[((num_tests - 1) / 2) - 1]
1074         else:
1075             lower = timings[num_tests / 2 - 1]
1076             upper = timings[num_tests / 2]
1077             median = (float(lower + upper)) / 2
1078
1079         mean = sum(timings) / num_tests
1080
1081         for time in timings:
1082             sum_of_deviations = math.pow(time - mean, 2)
1083
1084         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1085         self._printer.print_timing("  Median:          %6.3f" % median)
1086         self._printer.print_timing("  Mean:            %6.3f" % mean)
1087         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1088         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1089         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1090         self._printer.print_timing("")
1091
1092     def _print_result_summary(self, result_summary):
1093         """Print a short summary about how many tests passed.
1094
1095         Args:
1096           result_summary: information to log
1097         """
1098         failed = len(result_summary.failures)
1099         skipped = len(
1100             result_summary.tests_by_expectation[test_expectations.SKIP])
1101         total = result_summary.total
1102         passed = total - failed - skipped
1103         pct_passed = 0.0
1104         if total > 0:
1105             pct_passed = float(passed) * 100 / total
1106
1107         self._printer.print_actual("")
1108         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1109                      (passed, total, pct_passed))
1110         self._printer.print_actual("")
1111         self._print_result_summary_entry(result_summary,
1112             test_expectations.NOW, "Tests to be fixed")
1113
1114         self._printer.print_actual("")
1115         self._print_result_summary_entry(result_summary,
1116             test_expectations.WONTFIX,
1117             "Tests that will only be fixed if they crash (WONTFIX)")
1118         self._printer.print_actual("")
1119
1120     def _print_result_summary_entry(self, result_summary, timeline,
1121                                     heading):
1122         """Print a summary block of results for a particular timeline of test.
1123
1124         Args:
1125           result_summary: summary to print results for
1126           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1127           heading: a textual description of the timeline
1128         """
1129         total = len(result_summary.tests_by_timeline[timeline])
1130         not_passing = (total -
1131            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1132                result_summary.tests_by_timeline[timeline]))
1133         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1134
1135         for result in TestExpectationsFile.EXPECTATION_ORDER:
1136             if result == test_expectations.PASS:
1137                 continue
1138             results = (result_summary.tests_by_expectation[result] &
1139                        result_summary.tests_by_timeline[timeline])
1140             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1141             if not_passing and len(results):
1142                 pct = len(results) * 100.0 / not_passing
1143                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1144                     (len(results), desc[len(results) != 1], pct))
1145
1146     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1147         """
1148         test_files = a list of file paths
1149         failures = dictionary mapping test paths to failure objects
1150         title = title printed at top of test
1151         override_time = current time (used by unit tests)
1152         """
1153         page = """<html>
1154   <head>
1155     <title>Layout Test Results (%(time)s)</title>
1156   </head>
1157   <body>
1158     <h2>%(title)s (%(time)s)</h2>
1159         """ % {'title': title, 'time': override_time or time.asctime()}
1160
1161         for test_file in sorted(test_files):
1162             test_name = self._port.relative_test_filename(test_file)
1163             test_url = self._port.filename_to_uri(test_file)
1164             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1165             test_failures = failures.get(test_file, [])
1166             for failure in test_failures:
1167                 page += (u"&nbsp;&nbsp;%s<br/>" %
1168                          failure.result_html_output(test_name))
1169             page += "</p>\n"
1170         page += "</body></html>\n"
1171         return page
1172
1173     def _write_results_html_file(self, result_summary):
1174         """Write results.html which is a summary of tests that failed.
1175
1176         Args:
1177           result_summary: a summary of the results :)
1178
1179         Returns:
1180           True if any results were written (since expected failures may be
1181           omitted)
1182         """
1183         # test failures
1184         if self._options.full_results_html:
1185             results_title = "Test Failures"
1186             test_files = result_summary.failures.keys()
1187         else:
1188             results_title = "Unexpected Test Failures"
1189             unexpected_failures = self._get_failures(result_summary,
1190                 include_crashes=True)
1191             test_files = unexpected_failures.keys()
1192         if not len(test_files):
1193             return False
1194
1195         out_filename = os.path.join(self._options.results_directory,
1196                                     "results.html")
1197         with codecs.open(out_filename, "w", "utf-8") as results_file:
1198             html = self._results_html(test_files, result_summary.failures, results_title)
1199             results_file.write(html)
1200
1201         return True
1202
1203     def _show_results_html_file(self):
1204         """Shows the results.html page."""
1205         results_filename = os.path.join(self._options.results_directory,
1206                                         "results.html")
1207         self._port.show_results_html_file(results_filename)
1208
1209
1210 def read_test_files(files):
1211     tests = []
1212     for file in files:
1213         try:
1214             with codecs.open(file, 'r', 'utf-8') as file_contents:
1215                 # FIXME: This could be cleaner using a list comprehension.
1216                 for line in file_contents:
1217                     line = test_expectations.strip_comments(line)
1218                     if line:
1219                         tests.append(line)
1220         except IOError, e:
1221             if e.errno == errno.ENOENT:
1222                 _log.critical('')
1223                 _log.critical('--test-list file "%s" not found' % file)
1224             raise
1225     return tests