2010-12-22 Dirk Pranke <dpranke@chromium.org>
[WebKit.git] / Tools / Scripts / webkitpy / layout_tests / layout_package / test_runner.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """
32 The TestRunner class runs a series of tests (TestType interface) against a set
33 of test files.  If a test file fails a TestType, it returns a list TestFailure
34 objects to the TestRunner.  The TestRunner then aggregates the TestFailures to
35 create a final report.
36 """
37
38 from __future__ import with_statement
39
40 import codecs
41 import errno
42 import logging
43 import math
44 import os
45 import Queue
46 import random
47 import shutil
48 import sys
49 import time
50
51 from result_summary import ResultSummary
52 from test_input import TestInput
53
54 import dump_render_tree_thread
55 import json_layout_results_generator
56 import message_broker
57 import printing
58 import test_expectations
59 import test_failures
60 import test_results
61 import test_results_uploader
62
63 from webkitpy.thirdparty import simplejson
64 from webkitpy.tool import grammar
65
66 _log = logging.getLogger("webkitpy.layout_tests.run_webkit_tests")
67
68 # Builder base URL where we have the archived test results.
69 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
70
71 LAYOUT_TESTS_DIRECTORY = "LayoutTests" + os.sep
72
73 TestExpectationsFile = test_expectations.TestExpectationsFile
74
75
76 def summarize_unexpected_results(port_obj, expectations, result_summary,
77                                  retry_summary):
78     """Summarize any unexpected results as a dict.
79
80     FIXME: split this data structure into a separate class?
81
82     Args:
83         port_obj: interface to port-specific hooks
84         expectations: test_expectations.TestExpectations object
85         result_summary: summary object from initial test runs
86         retry_summary: summary object from final test run of retried tests
87     Returns:
88         A dictionary containing a summary of the unexpected results from the
89         run, with the following fields:
90         'version': a version indicator (1 in this version)
91         'fixable': # of fixable tests (NOW - PASS)
92         'skipped': # of skipped tests (NOW & SKIPPED)
93         'num_regressions': # of non-flaky failures
94         'num_flaky': # of flaky failures
95         'num_passes': # of unexpected passes
96         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
97     """
98     results = {}
99     results['version'] = 1
100
101     tbe = result_summary.tests_by_expectation
102     tbt = result_summary.tests_by_timeline
103     results['fixable'] = len(tbt[test_expectations.NOW] -
104                                 tbe[test_expectations.PASS])
105     results['skipped'] = len(tbt[test_expectations.NOW] &
106                                 tbe[test_expectations.SKIP])
107
108     num_passes = 0
109     num_flaky = 0
110     num_regressions = 0
111     keywords = {}
112     for k, v in TestExpectationsFile.EXPECTATIONS.iteritems():
113         keywords[v] = k.upper()
114
115     tests = {}
116     for filename, result in result_summary.unexpected_results.iteritems():
117         # Note that if a test crashed in the original run, we ignore
118         # whether or not it crashed when we retried it (if we retried it),
119         # and always consider the result not flaky.
120         test = port_obj.relative_test_filename(filename)
121         expected = expectations.get_expectations_string(filename)
122         actual = [keywords[result]]
123
124         if result == test_expectations.PASS:
125             num_passes += 1
126         elif result == test_expectations.CRASH:
127             num_regressions += 1
128         else:
129             if filename not in retry_summary.unexpected_results:
130                 actual.extend(expectations.get_expectations_string(
131                     filename).split(" "))
132                 num_flaky += 1
133             else:
134                 retry_result = retry_summary.unexpected_results[filename]
135                 if result != retry_result:
136                     actual.append(keywords[retry_result])
137                     num_flaky += 1
138                 else:
139                     num_regressions += 1
140
141         tests[test] = {}
142         tests[test]['expected'] = expected
143         tests[test]['actual'] = " ".join(actual)
144
145     results['tests'] = tests
146     results['num_passes'] = num_passes
147     results['num_flaky'] = num_flaky
148     results['num_regressions'] = num_regressions
149
150     return results
151
152
153 class TestRunInterruptedException(Exception):
154     """Raised when a test run should be stopped immediately."""
155     def __init__(self, reason):
156         self.reason = reason
157
158
159 class TestRunner:
160     """A class for managing running a series of tests on a series of layout
161     test files."""
162
163     HTTP_SUBDIR = os.sep.join(['', 'http', ''])
164     WEBSOCKET_SUBDIR = os.sep.join(['', 'websocket', ''])
165
166     # The per-test timeout in milliseconds, if no --time-out-ms option was
167     # given to run_webkit_tests. This should correspond to the default timeout
168     # in DumpRenderTree.
169     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
170
171     def __init__(self, port, options, printer):
172         """Initialize test runner data structures.
173
174         Args:
175           port: an object implementing port-specific
176           options: a dictionary of command line options
177           printer: a Printer object to record updates to.
178         """
179         self._port = port
180         self._options = options
181         self._printer = printer
182         self._message_broker = None
183
184         # disable wss server. need to install pyOpenSSL on buildbots.
185         # self._websocket_secure_server = websocket_server.PyWebSocket(
186         #        options.results_directory, use_tls=True, port=9323)
187
188         # a set of test files, and the same tests as a list
189         self._test_files = set()
190         self._test_files_list = None
191         self._result_queue = Queue.Queue()
192         self._retrying = False
193
194     def collect_tests(self, args, last_unexpected_results):
195         """Find all the files to test.
196
197         Args:
198           args: list of test arguments from the command line
199           last_unexpected_results: list of unexpected results to retest, if any
200
201         """
202         paths = [self._strip_test_dir_prefix(arg) for arg in args if arg and arg != '']
203         paths += last_unexpected_results
204         if self._options.test_list:
205             paths += read_test_files(self._options.test_list)
206         self._test_files = self._port.tests(paths)
207
208     def _strip_test_dir_prefix(self, path):
209         if path.startswith(LAYOUT_TESTS_DIRECTORY):
210             return path[len(LAYOUT_TESTS_DIRECTORY):]
211         return path
212
213     def lint(self):
214         lint_failed = False
215
216         # Creating the expecations for each platform/configuration pair does
217         # all the test list parsing and ensures it's correct syntax (e.g. no
218         # dupes).
219         for platform_name in self._port.test_platform_names():
220             try:
221                 self.parse_expectations(platform_name, is_debug_mode=True)
222             except test_expectations.ParseError:
223                 lint_failed = True
224             try:
225                 self.parse_expectations(platform_name, is_debug_mode=False)
226             except test_expectations.ParseError:
227                 lint_failed = True
228
229         self._printer.write("")
230         if lint_failed:
231             _log.error("Lint failed.")
232             return -1
233
234         _log.info("Lint succeeded.")
235         return 0
236
237     def parse_expectations(self, test_platform_name, is_debug_mode):
238         """Parse the expectations from the test_list files and return a data
239         structure holding them. Throws an error if the test_list files have
240         invalid syntax."""
241         if self._options.lint_test_files:
242             test_files = None
243         else:
244             test_files = self._test_files
245
246         expectations_str = self._port.test_expectations()
247         overrides_str = self._port.test_expectations_overrides()
248         self._expectations = test_expectations.TestExpectations(
249             self._port, test_files, expectations_str, test_platform_name,
250             is_debug_mode, self._options.lint_test_files,
251             overrides=overrides_str)
252         return self._expectations
253
254     def prepare_lists_and_print_output(self):
255         """Create appropriate subsets of test lists and returns a
256         ResultSummary object. Also prints expected test counts.
257         """
258
259         # Remove skipped - both fixable and ignored - files from the
260         # top-level list of files to test.
261         num_all_test_files = len(self._test_files)
262         self._printer.print_expected("Found:  %d tests" %
263                                      (len(self._test_files)))
264         if not num_all_test_files:
265             _log.critical('No tests to run.')
266             return None
267
268         skipped = set()
269         if num_all_test_files > 1 and not self._options.force:
270             skipped = self._expectations.get_tests_with_result_type(
271                            test_expectations.SKIP)
272             self._test_files -= skipped
273
274         # Create a sorted list of test files so the subset chunk,
275         # if used, contains alphabetically consecutive tests.
276         self._test_files_list = list(self._test_files)
277         if self._options.randomize_order:
278             random.shuffle(self._test_files_list)
279         else:
280             self._test_files_list.sort()
281
282         # If the user specifies they just want to run a subset of the tests,
283         # just grab a subset of the non-skipped tests.
284         if self._options.run_chunk or self._options.run_part:
285             chunk_value = self._options.run_chunk or self._options.run_part
286             test_files = self._test_files_list
287             try:
288                 (chunk_num, chunk_len) = chunk_value.split(":")
289                 chunk_num = int(chunk_num)
290                 assert(chunk_num >= 0)
291                 test_size = int(chunk_len)
292                 assert(test_size > 0)
293             except:
294                 _log.critical("invalid chunk '%s'" % chunk_value)
295                 return None
296
297             # Get the number of tests
298             num_tests = len(test_files)
299
300             # Get the start offset of the slice.
301             if self._options.run_chunk:
302                 chunk_len = test_size
303                 # In this case chunk_num can be really large. We need
304                 # to make the slave fit in the current number of tests.
305                 slice_start = (chunk_num * chunk_len) % num_tests
306             else:
307                 # Validate the data.
308                 assert(test_size <= num_tests)
309                 assert(chunk_num <= test_size)
310
311                 # To count the chunk_len, and make sure we don't skip
312                 # some tests, we round to the next value that fits exactly
313                 # all the parts.
314                 rounded_tests = num_tests
315                 if rounded_tests % test_size != 0:
316                     rounded_tests = (num_tests + test_size -
317                                      (num_tests % test_size))
318
319                 chunk_len = rounded_tests / test_size
320                 slice_start = chunk_len * (chunk_num - 1)
321                 # It does not mind if we go over test_size.
322
323             # Get the end offset of the slice.
324             slice_end = min(num_tests, slice_start + chunk_len)
325
326             files = test_files[slice_start:slice_end]
327
328             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
329                 (slice_end - slice_start), slice_start, slice_end, num_tests)
330             self._printer.print_expected(tests_run_msg)
331
332             # If we reached the end and we don't have enough tests, we run some
333             # from the beginning.
334             if slice_end - slice_start < chunk_len:
335                 extra = chunk_len - (slice_end - slice_start)
336                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
337                             extra)
338                 self._printer.print_expected(extra_msg)
339                 tests_run_msg += "\n" + extra_msg
340                 files.extend(test_files[0:extra])
341             tests_run_filename = os.path.join(self._options.results_directory,
342                                               "tests_run.txt")
343             with codecs.open(tests_run_filename, "w", "utf-8") as file:
344                 file.write(tests_run_msg + "\n")
345
346             len_skip_chunk = int(len(files) * len(skipped) /
347                                  float(len(self._test_files)))
348             skip_chunk_list = list(skipped)[0:len_skip_chunk]
349             skip_chunk = set(skip_chunk_list)
350
351             # Update expectations so that the stats are calculated correctly.
352             # We need to pass a list that includes the right # of skipped files
353             # to ParseExpectations so that ResultSummary() will get the correct
354             # stats. So, we add in the subset of skipped files, and then
355             # subtract them back out.
356             self._test_files_list = files + skip_chunk_list
357             self._test_files = set(self._test_files_list)
358
359             self._expectations = self.parse_expectations(
360                 self._port.test_platform_name(),
361                 self._options.configuration == 'Debug')
362
363             self._test_files = set(files)
364             self._test_files_list = files
365         else:
366             skip_chunk = skipped
367
368         result_summary = ResultSummary(self._expectations,
369             self._test_files | skip_chunk)
370         self._print_expected_results_of_type(result_summary,
371             test_expectations.PASS, "passes")
372         self._print_expected_results_of_type(result_summary,
373             test_expectations.FAIL, "failures")
374         self._print_expected_results_of_type(result_summary,
375             test_expectations.FLAKY, "flaky")
376         self._print_expected_results_of_type(result_summary,
377             test_expectations.SKIP, "skipped")
378
379         if self._options.force:
380             self._printer.print_expected('Running all tests, including '
381                                          'skips (--force)')
382         else:
383             # Note that we don't actually run the skipped tests (they were
384             # subtracted out of self._test_files, above), but we stub out the
385             # results here so the statistics can remain accurate.
386             for test in skip_chunk:
387                 result = test_results.TestResult(test,
388                     failures=[], test_run_time=0, total_time_for_all_diffs=0,
389                     time_for_diffs=0)
390                 result.type = test_expectations.SKIP
391                 result_summary.add(result, expected=True)
392         self._printer.print_expected('')
393
394         return result_summary
395
396     def _get_dir_for_test_file(self, test_file):
397         """Returns the highest-level directory by which to shard the given
398         test file."""
399         index = test_file.rfind(os.sep + LAYOUT_TESTS_DIRECTORY)
400
401         test_file = test_file[index + len(LAYOUT_TESTS_DIRECTORY):]
402         test_file_parts = test_file.split(os.sep, 1)
403         directory = test_file_parts[0]
404         test_file = test_file_parts[1]
405
406         # The http tests are very stable on mac/linux.
407         # TODO(ojan): Make the http server on Windows be apache so we can
408         # turn shard the http tests there as well. Switching to apache is
409         # what made them stable on linux/mac.
410         return_value = directory
411         while ((directory != 'http' or sys.platform in ('darwin', 'linux2'))
412                 and test_file.find(os.sep) >= 0):
413             test_file_parts = test_file.split(os.sep, 1)
414             directory = test_file_parts[0]
415             return_value = os.path.join(return_value, directory)
416             test_file = test_file_parts[1]
417
418         return return_value
419
420     def _get_test_input_for_file(self, test_file):
421         """Returns the appropriate TestInput object for the file. Mostly this
422         is used for looking up the timeout value (in ms) to use for the given
423         test."""
424         if self._test_is_slow(test_file):
425             return TestInput(test_file, self._options.slow_time_out_ms)
426         return TestInput(test_file, self._options.time_out_ms)
427
428     def _test_requires_lock(self, test_file):
429         """Return True if the test needs to be locked when
430         running multiple copies of NRWTs."""
431         split_path = test_file.split(os.sep)
432         return 'http' in split_path or 'websocket' in split_path
433
434     def _test_is_slow(self, test_file):
435         return self._expectations.has_modifier(test_file,
436                                                test_expectations.SLOW)
437
438     def _shard_tests(self, test_files, use_real_shards):
439         """Groups tests into batches.
440         This helps ensure that tests that depend on each other (aka bad tests!)
441         continue to run together as most cross-tests dependencies tend to
442         occur within the same directory. If use_real_shards is False, we
443         put each (non-HTTP/websocket) test into its own shard for maximum
444         concurrency instead of trying to do any sort of real sharding.
445
446         Return:
447             A list of lists of TestInput objects.
448         """
449         # FIXME: when we added http locking, we changed how this works such
450         # that we always lump all of the HTTP threads into a single shard.
451         # That will slow down experimental-fully-parallel, but it's unclear
452         # what the best alternative is completely revamping how we track
453         # when to grab the lock.
454
455         test_lists = []
456         tests_to_http_lock = []
457         if not use_real_shards:
458             for test_file in test_files:
459                 test_input = self._get_test_input_for_file(test_file)
460                 if self._test_requires_lock(test_file):
461                     tests_to_http_lock.append(test_input)
462                 else:
463                     test_lists.append((".", [test_input]))
464         else:
465             tests_by_dir = {}
466             for test_file in test_files:
467                 directory = self._get_dir_for_test_file(test_file)
468                 test_input = self._get_test_input_for_file(test_file)
469                 if self._test_requires_lock(test_file):
470                     tests_to_http_lock.append(test_input)
471                 else:
472                     tests_by_dir.setdefault(directory, [])
473                     tests_by_dir[directory].append(test_input)
474             # Sort by the number of tests in the dir so that the ones with the
475             # most tests get run first in order to maximize parallelization.
476             # Number of tests is a good enough, but not perfect, approximation
477             # of how long that set of tests will take to run. We can't just use
478             # a PriorityQueue until we move to Python 2.6.
479             for directory in tests_by_dir:
480                 test_list = tests_by_dir[directory]
481                 # Keep the tests in alphabetical order.
482                 # FIXME: Remove once tests are fixed so they can be run in any
483                 # order.
484                 test_list.reverse()
485                 test_list_tuple = (directory, test_list)
486                 test_lists.append(test_list_tuple)
487             test_lists.sort(lambda a, b: cmp(len(b[1]), len(a[1])))
488
489         # Put the http tests first. There are only a couple hundred of them,
490         # but each http test takes a very long time to run, so sorting by the
491         # number of tests doesn't accurately capture how long they take to run.
492         if tests_to_http_lock:
493             tests_to_http_lock.reverse()
494             test_lists.insert(0, ("tests_to_http_lock", tests_to_http_lock))
495
496         return test_lists
497
498     def _contains_tests(self, subdir):
499         for test_file in self._test_files:
500             if test_file.find(subdir) >= 0:
501                 return True
502         return False
503
504     def _num_workers(self):
505         return int(self._options.child_processes)
506
507     def _run_tests(self, file_list, result_summary):
508         """Runs the tests in the file_list.
509
510         Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
511             test_timings, individual_test_timings)
512             interrupted is whether the run was interrupted
513             keyboard_interrupted is whether the interruption was because someone
514               typed Ctrl^C
515             thread_timings is a list of dicts with the total runtime
516               of each thread with 'name', 'num_tests', 'total_time' properties
517             test_timings is a list of timings for each sharded subdirectory
518               of the form [time, directory_name, num_tests]
519             individual_test_timings is a list of run times for each test
520               in the form {filename:filename, test_run_time:test_run_time}
521             result_summary: summary object to populate with the results
522         """
523
524         self._printer.print_update('Sharding tests ...')
525         num_workers = self._num_workers()
526         test_lists = self._shard_tests(file_list,
527             num_workers > 1 and not self._options.experimental_fully_parallel)
528         filename_queue = Queue.Queue()
529         for item in test_lists:
530             filename_queue.put(item)
531
532         self._printer.print_update('Starting %s ...' %
533                                    grammar.pluralize('worker', num_workers))
534         self._message_broker = message_broker.get(self._port, self._options)
535         broker = self._message_broker
536         self._current_filename_queue = filename_queue
537         self._current_result_summary = result_summary
538
539         if not self._options.dry_run:
540             threads = broker.start_workers(self)
541         else:
542             threads = {}
543
544         self._printer.print_update("Starting testing ...")
545         keyboard_interrupted = False
546         interrupted = False
547         if not self._options.dry_run:
548             try:
549                 broker.run_message_loop()
550             except KeyboardInterrupt:
551                 _log.info("Interrupted, exiting")
552                 broker.cancel_workers()
553                 keyboard_interrupted = True
554                 interrupted = True
555             except TestRunInterruptedException, e:
556                 _log.info(e.reason)
557                 broker.cancel_workers()
558                 interrupted = True
559             except:
560                 # Unexpected exception; don't try to clean up workers.
561                 _log.info("Exception raised, exiting")
562                 raise
563
564         thread_timings, test_timings, individual_test_timings = \
565             self._collect_timing_info(threads)
566
567         broker.cleanup()
568         self._message_broker = None
569         return (interrupted, keyboard_interrupted, thread_timings, test_timings,
570                 individual_test_timings)
571
572     def update(self):
573         self.update_summary(self._current_result_summary)
574
575     def _collect_timing_info(self, threads):
576         test_timings = {}
577         individual_test_timings = []
578         thread_timings = []
579
580         for thread in threads:
581             thread_timings.append({'name': thread.getName(),
582                                    'num_tests': thread.get_num_tests(),
583                                    'total_time': thread.get_total_time()})
584             test_timings.update(thread.get_test_group_timing_stats())
585             individual_test_timings.extend(thread.get_test_results())
586
587         return (thread_timings, test_timings, individual_test_timings)
588
589     def needs_http(self):
590         """Returns whether the test runner needs an HTTP server."""
591         return self._contains_tests(self.HTTP_SUBDIR)
592
593     def needs_websocket(self):
594         """Returns whether the test runner needs a WEBSOCKET server."""
595         return self._contains_tests(self.WEBSOCKET_SUBDIR)
596
597     def set_up_run(self):
598         """Configures the system to be ready to run tests.
599
600         Returns a ResultSummary object if we should continue to run tests,
601         or None if we should abort.
602
603         """
604         # This must be started before we check the system dependencies,
605         # since the helper may do things to make the setup correct.
606         self._printer.print_update("Starting helper ...")
607         self._port.start_helper()
608
609         # Check that the system dependencies (themes, fonts, ...) are correct.
610         if not self._options.nocheck_sys_deps:
611             self._printer.print_update("Checking system dependencies ...")
612             if not self._port.check_sys_deps(self.needs_http()):
613                 self._port.stop_helper()
614                 return None
615
616         if self._options.clobber_old_results:
617             self._clobber_old_results()
618
619         # Create the output directory if it doesn't already exist.
620         self._port.maybe_make_directory(self._options.results_directory)
621
622         self._port.setup_test_run()
623
624         self._printer.print_update("Preparing tests ...")
625         result_summary = self.prepare_lists_and_print_output()
626         if not result_summary:
627             return None
628
629         return result_summary
630
631     def run(self, result_summary):
632         """Run all our tests on all our test files.
633
634         For each test file, we run each test type. If there are any failures,
635         we collect them for reporting.
636
637         Args:
638           result_summary: a summary object tracking the test results.
639
640         Return:
641           The number of unexpected results (0 == success)
642         """
643         # gather_test_files() must have been called first to initialize us.
644         # If we didn't find any files to test, we've errored out already in
645         # prepare_lists_and_print_output().
646         assert(len(self._test_files))
647
648         start_time = time.time()
649
650         interrupted, keyboard_interrupted, thread_timings, test_timings, \
651             individual_test_timings = (
652             self._run_tests(self._test_files_list, result_summary))
653
654         # We exclude the crashes from the list of results to retry, because
655         # we want to treat even a potentially flaky crash as an error.
656         failures = self._get_failures(result_summary, include_crashes=False)
657         retry_summary = result_summary
658         while (len(failures) and self._options.retry_failures and
659             not self._retrying and not interrupted):
660             _log.info('')
661             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
662             _log.info('')
663             self._retrying = True
664             retry_summary = ResultSummary(self._expectations, failures.keys())
665             # Note that we intentionally ignore the return value here.
666             self._run_tests(failures.keys(), retry_summary)
667             failures = self._get_failures(retry_summary, include_crashes=True)
668
669         end_time = time.time()
670
671         self._print_timing_statistics(end_time - start_time,
672                                       thread_timings, test_timings,
673                                       individual_test_timings,
674                                       result_summary)
675
676         self._print_result_summary(result_summary)
677
678         sys.stdout.flush()
679         sys.stderr.flush()
680
681         self._printer.print_one_line_summary(result_summary.total,
682                                              result_summary.expected,
683                                              result_summary.unexpected)
684
685         unexpected_results = summarize_unexpected_results(self._port,
686             self._expectations, result_summary, retry_summary)
687         self._printer.print_unexpected_results(unexpected_results)
688
689         if (self._options.record_results and not self._options.dry_run and
690             not interrupted):
691             # Write the same data to log files and upload generated JSON files
692             # to appengine server.
693             self._upload_json_files(unexpected_results, result_summary,
694                                     individual_test_timings)
695
696         # Write the summary to disk (results.html) and display it if requested.
697         if not self._options.dry_run:
698             wrote_results = self._write_results_html_file(result_summary)
699             if self._options.show_results and wrote_results:
700                 self._show_results_html_file()
701
702         # Now that we've completed all the processing we can, we re-raise
703         # a KeyboardInterrupt if necessary so the caller can handle it.
704         if keyboard_interrupted:
705             raise KeyboardInterrupt
706
707         # Ignore flaky failures and unexpected passes so we don't turn the
708         # bot red for those.
709         return unexpected_results['num_regressions']
710
711     def clean_up_run(self):
712         """Restores the system after we're done running tests."""
713
714         _log.debug("flushing stdout")
715         sys.stdout.flush()
716         _log.debug("flushing stderr")
717         sys.stderr.flush()
718         _log.debug("stopping helper")
719         self._port.stop_helper()
720
721     def update_summary(self, result_summary):
722         """Update the summary and print results with any completed tests."""
723         while True:
724             try:
725                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
726             except Queue.Empty:
727                 return
728
729             expected = self._expectations.matches_an_expected_result(
730                 result.filename, result.type, self._options.pixel_tests)
731             result_summary.add(result, expected)
732             exp_str = self._expectations.get_expectations_string(
733                 result.filename)
734             got_str = self._expectations.expectation_to_string(result.type)
735             self._printer.print_test_result(result, expected, exp_str, got_str)
736             self._printer.print_progress(result_summary, self._retrying,
737                                          self._test_files_list)
738
739             def interrupt_if_at_failure_limit(limit, count, message):
740                 if limit and count >= limit:
741                     raise TestRunInterruptedException(message % count)
742
743             interrupt_if_at_failure_limit(
744                 self._options.exit_after_n_failures,
745                 result_summary.unexpected_failures,
746                 "Aborting run since %d failures were reached")
747             interrupt_if_at_failure_limit(
748                 self._options.exit_after_n_crashes_or_timeouts,
749                 result_summary.unexpected_crashes_or_timeouts,
750                 "Aborting run since %d crashes or timeouts were reached")
751
752     def _clobber_old_results(self):
753         # Just clobber the actual test results directories since the other
754         # files in the results directory are explicitly used for cross-run
755         # tracking.
756         self._printer.print_update("Clobbering old results in %s" %
757                                    self._options.results_directory)
758         layout_tests_dir = self._port.layout_tests_dir()
759         possible_dirs = self._port.test_dirs()
760         for dirname in possible_dirs:
761             if os.path.isdir(os.path.join(layout_tests_dir, dirname)):
762                 shutil.rmtree(os.path.join(self._options.results_directory,
763                                            dirname),
764                               ignore_errors=True)
765
766     def _get_failures(self, result_summary, include_crashes):
767         """Filters a dict of results and returns only the failures.
768
769         Args:
770           result_summary: the results of the test run
771           include_crashes: whether crashes are included in the output.
772             We use False when finding the list of failures to retry
773             to see if the results were flaky. Although the crashes may also be
774             flaky, we treat them as if they aren't so that they're not ignored.
775         Returns:
776           a dict of files -> results
777         """
778         failed_results = {}
779         for test, result in result_summary.unexpected_results.iteritems():
780             if (result == test_expectations.PASS or
781                 result == test_expectations.CRASH and not include_crashes):
782                 continue
783             failed_results[test] = result
784
785         return failed_results
786
787     def _upload_json_files(self, unexpected_results, result_summary,
788                         individual_test_timings):
789         """Writes the results of the test run as JSON files into the results
790         dir and upload the files to the appengine server.
791
792         There are three different files written into the results dir:
793           unexpected_results.json: A short list of any unexpected results.
794             This is used by the buildbots to display results.
795           expectations.json: This is used by the flakiness dashboard.
796           results.json: A full list of the results - used by the flakiness
797             dashboard and the aggregate results dashboard.
798
799         Args:
800           unexpected_results: dict of unexpected results
801           result_summary: full summary object
802           individual_test_timings: list of test times (used by the flakiness
803             dashboard).
804         """
805         results_directory = self._options.results_directory
806         _log.debug("Writing JSON files in %s." % results_directory)
807         unexpected_json_path = os.path.join(results_directory, "unexpected_results.json")
808         with codecs.open(unexpected_json_path, "w", "utf-8") as file:
809             simplejson.dump(unexpected_results, file, sort_keys=True, indent=2)
810
811         # Write a json file of the test_expectations.txt file for the layout
812         # tests dashboard.
813         expectations_path = os.path.join(results_directory, "expectations.json")
814         expectations_json = \
815             self._expectations.get_expectations_json_for_all_platforms()
816         with codecs.open(expectations_path, "w", "utf-8") as file:
817             file.write(u"ADD_EXPECTATIONS(%s);" % expectations_json)
818
819         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
820             self._port, self._options.builder_name, self._options.build_name,
821             self._options.build_number, self._options.results_directory,
822             BUILDER_BASE_URL, individual_test_timings,
823             self._expectations, result_summary, self._test_files_list,
824             not self._options.upload_full_results,
825             self._options.test_results_server,
826             "layout-tests",
827             self._options.master_name)
828
829         _log.debug("Finished writing JSON files.")
830
831         json_files = ["expectations.json"]
832         if self._options.upload_full_results:
833             json_files.append("results.json")
834         else:
835             json_files.append("incremental_results.json")
836
837         generator.upload_json_files(json_files)
838
839     def _print_config(self):
840         """Prints the configuration for the test run."""
841         p = self._printer
842         p.print_config("Using port '%s'" % self._port.name())
843         p.print_config("Placing test results in %s" %
844                        self._options.results_directory)
845         if self._options.new_baseline:
846             p.print_config("Placing new baselines in %s" %
847                            self._port.baseline_path())
848         p.print_config("Using %s build" % self._options.configuration)
849         if self._options.pixel_tests:
850             p.print_config("Pixel tests enabled")
851         else:
852             p.print_config("Pixel tests disabled")
853
854         p.print_config("Regular timeout: %s, slow test timeout: %s" %
855                        (self._options.time_out_ms,
856                         self._options.slow_time_out_ms))
857
858         if self._num_workers() == 1:
859             p.print_config("Running one %s" % self._port.driver_name())
860         else:
861             p.print_config("Running %s %ss in parallel" %
862                            (self._options.child_processes,
863                             self._port.driver_name()))
864         p.print_config('Command line: ' +
865                        ' '.join(self._port.driver_cmd_line()))
866         p.print_config("Worker model: %s" % self._options.worker_model)
867         p.print_config("")
868
869     def _print_expected_results_of_type(self, result_summary,
870                                         result_type, result_type_str):
871         """Print the number of the tests in a given result class.
872
873         Args:
874           result_summary - the object containing all the results to report on
875           result_type - the particular result type to report in the summary.
876           result_type_str - a string description of the result_type.
877         """
878         tests = self._expectations.get_tests_with_result_type(result_type)
879         now = result_summary.tests_by_timeline[test_expectations.NOW]
880         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
881
882         # We use a fancy format string in order to print the data out in a
883         # nicely-aligned table.
884         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
885                   % (self._num_digits(now), self._num_digits(wontfix)))
886         self._printer.print_expected(fmtstr %
887             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
888
889     def _num_digits(self, num):
890         """Returns the number of digits needed to represent the length of a
891         sequence."""
892         ndigits = 1
893         if len(num):
894             ndigits = int(math.log10(len(num))) + 1
895         return ndigits
896
897     def _print_timing_statistics(self, total_time, thread_timings,
898                                directory_test_timings, individual_test_timings,
899                                result_summary):
900         """Record timing-specific information for the test run.
901
902         Args:
903           total_time: total elapsed time (in seconds) for the test run
904           thread_timings: wall clock time each thread ran for
905           directory_test_timings: timing by directory
906           individual_test_timings: timing by file
907           result_summary: summary object for the test run
908         """
909         self._printer.print_timing("Test timing:")
910         self._printer.print_timing("  %6.2f total testing time" % total_time)
911         self._printer.print_timing("")
912         self._printer.print_timing("Thread timing:")
913         cuml_time = 0
914         for t in thread_timings:
915             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
916                   (t['name'], t['num_tests'], t['total_time']))
917             cuml_time += t['total_time']
918         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
919               (cuml_time, cuml_time / int(self._options.child_processes)))
920         self._printer.print_timing("")
921
922         self._print_aggregate_test_statistics(individual_test_timings)
923         self._print_individual_test_times(individual_test_timings,
924                                           result_summary)
925         self._print_directory_timings(directory_test_timings)
926
927     def _print_aggregate_test_statistics(self, individual_test_timings):
928         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
929         Args:
930           individual_test_timings: List of TestResults for all tests.
931         """
932         test_types = []  # Unit tests don't actually produce any timings.
933         if individual_test_timings:
934             test_types = individual_test_timings[0].time_for_diffs.keys()
935         times_for_dump_render_tree = []
936         times_for_diff_processing = []
937         times_per_test_type = {}
938         for test_type in test_types:
939             times_per_test_type[test_type] = []
940
941         for test_stats in individual_test_timings:
942             times_for_dump_render_tree.append(test_stats.test_run_time)
943             times_for_diff_processing.append(
944                 test_stats.total_time_for_all_diffs)
945             time_for_diffs = test_stats.time_for_diffs
946             for test_type in test_types:
947                 times_per_test_type[test_type].append(
948                     time_for_diffs[test_type])
949
950         self._print_statistics_for_test_timings(
951             "PER TEST TIME IN TESTSHELL (seconds):",
952             times_for_dump_render_tree)
953         self._print_statistics_for_test_timings(
954             "PER TEST DIFF PROCESSING TIMES (seconds):",
955             times_for_diff_processing)
956         for test_type in test_types:
957             self._print_statistics_for_test_timings(
958                 "PER TEST TIMES BY TEST TYPE: %s" % test_type,
959                 times_per_test_type[test_type])
960
961     def _print_individual_test_times(self, individual_test_timings,
962                                   result_summary):
963         """Prints the run times for slow, timeout and crash tests.
964         Args:
965           individual_test_timings: List of TestStats for all tests.
966           result_summary: summary object for test run
967         """
968         # Reverse-sort by the time spent in DumpRenderTree.
969         individual_test_timings.sort(lambda a, b:
970             cmp(b.test_run_time, a.test_run_time))
971
972         num_printed = 0
973         slow_tests = []
974         timeout_or_crash_tests = []
975         unexpected_slow_tests = []
976         for test_tuple in individual_test_timings:
977             filename = test_tuple.filename
978             is_timeout_crash_or_slow = False
979             if self._test_is_slow(filename):
980                 is_timeout_crash_or_slow = True
981                 slow_tests.append(test_tuple)
982
983             if filename in result_summary.failures:
984                 result = result_summary.results[filename].type
985                 if (result == test_expectations.TIMEOUT or
986                     result == test_expectations.CRASH):
987                     is_timeout_crash_or_slow = True
988                     timeout_or_crash_tests.append(test_tuple)
989
990             if (not is_timeout_crash_or_slow and
991                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
992                 num_printed = num_printed + 1
993                 unexpected_slow_tests.append(test_tuple)
994
995         self._printer.print_timing("")
996         self._print_test_list_timing("%s slowest tests that are not "
997             "marked as SLOW and did not timeout/crash:" %
998             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
999         self._printer.print_timing("")
1000         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1001         self._printer.print_timing("")
1002         self._print_test_list_timing("Tests that timed out or crashed:",
1003                                      timeout_or_crash_tests)
1004         self._printer.print_timing("")
1005
1006     def _print_test_list_timing(self, title, test_list):
1007         """Print timing info for each test.
1008
1009         Args:
1010           title: section heading
1011           test_list: tests that fall in this section
1012         """
1013         if self._printer.disabled('slowest'):
1014             return
1015
1016         self._printer.print_timing(title)
1017         for test_tuple in test_list:
1018             filename = test_tuple.filename[len(
1019                 self._port.layout_tests_dir()) + 1:]
1020             filename = filename.replace('\\', '/')
1021             test_run_time = round(test_tuple.test_run_time, 1)
1022             self._printer.print_timing("  %s took %s seconds" %
1023                                        (filename, test_run_time))
1024
1025     def _print_directory_timings(self, directory_test_timings):
1026         """Print timing info by directory for any directories that
1027         take > 10 seconds to run.
1028
1029         Args:
1030           directory_test_timing: time info for each directory
1031         """
1032         timings = []
1033         for directory in directory_test_timings:
1034             num_tests, time_for_directory = directory_test_timings[directory]
1035             timings.append((round(time_for_directory, 1), directory,
1036                             num_tests))
1037         timings.sort()
1038
1039         self._printer.print_timing("Time to process slowest subdirectories:")
1040         min_seconds_to_print = 10
1041         for timing in timings:
1042             if timing[0] > min_seconds_to_print:
1043                 self._printer.print_timing(
1044                     "  %s took %s seconds to run %s tests." % (timing[1],
1045                     timing[0], timing[2]))
1046         self._printer.print_timing("")
1047
1048     def _print_statistics_for_test_timings(self, title, timings):
1049         """Prints the median, mean and standard deviation of the values in
1050         timings.
1051
1052         Args:
1053           title: Title for these timings.
1054           timings: A list of floats representing times.
1055         """
1056         self._printer.print_timing(title)
1057         timings.sort()
1058
1059         num_tests = len(timings)
1060         if not num_tests:
1061             return
1062         percentile90 = timings[int(.9 * num_tests)]
1063         percentile99 = timings[int(.99 * num_tests)]
1064
1065         if num_tests % 2 == 1:
1066             median = timings[((num_tests - 1) / 2) - 1]
1067         else:
1068             lower = timings[num_tests / 2 - 1]
1069             upper = timings[num_tests / 2]
1070             median = (float(lower + upper)) / 2
1071
1072         mean = sum(timings) / num_tests
1073
1074         for time in timings:
1075             sum_of_deviations = math.pow(time - mean, 2)
1076
1077         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1078         self._printer.print_timing("  Median:          %6.3f" % median)
1079         self._printer.print_timing("  Mean:            %6.3f" % mean)
1080         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1081         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1082         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1083         self._printer.print_timing("")
1084
1085     def _print_result_summary(self, result_summary):
1086         """Print a short summary about how many tests passed.
1087
1088         Args:
1089           result_summary: information to log
1090         """
1091         failed = len(result_summary.failures)
1092         skipped = len(
1093             result_summary.tests_by_expectation[test_expectations.SKIP])
1094         total = result_summary.total
1095         passed = total - failed - skipped
1096         pct_passed = 0.0
1097         if total > 0:
1098             pct_passed = float(passed) * 100 / total
1099
1100         self._printer.print_actual("")
1101         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1102                      (passed, total, pct_passed))
1103         self._printer.print_actual("")
1104         self._print_result_summary_entry(result_summary,
1105             test_expectations.NOW, "Tests to be fixed")
1106
1107         self._printer.print_actual("")
1108         self._print_result_summary_entry(result_summary,
1109             test_expectations.WONTFIX,
1110             "Tests that will only be fixed if they crash (WONTFIX)")
1111         self._printer.print_actual("")
1112
1113     def _print_result_summary_entry(self, result_summary, timeline,
1114                                     heading):
1115         """Print a summary block of results for a particular timeline of test.
1116
1117         Args:
1118           result_summary: summary to print results for
1119           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1120           heading: a textual description of the timeline
1121         """
1122         total = len(result_summary.tests_by_timeline[timeline])
1123         not_passing = (total -
1124            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1125                result_summary.tests_by_timeline[timeline]))
1126         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1127
1128         for result in TestExpectationsFile.EXPECTATION_ORDER:
1129             if result == test_expectations.PASS:
1130                 continue
1131             results = (result_summary.tests_by_expectation[result] &
1132                        result_summary.tests_by_timeline[timeline])
1133             desc = TestExpectationsFile.EXPECTATION_DESCRIPTIONS[result]
1134             if not_passing and len(results):
1135                 pct = len(results) * 100.0 / not_passing
1136                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1137                     (len(results), desc[len(results) != 1], pct))
1138
1139     def _results_html(self, test_files, failures, title="Test Failures", override_time=None):
1140         """
1141         test_files = a list of file paths
1142         failures = dictionary mapping test paths to failure objects
1143         title = title printed at top of test
1144         override_time = current time (used by unit tests)
1145         """
1146         page = """<html>
1147   <head>
1148     <title>Layout Test Results (%(time)s)</title>
1149   </head>
1150   <body>
1151     <h2>%(title)s (%(time)s)</h2>
1152         """ % {'title': title, 'time': override_time or time.asctime()}
1153
1154         for test_file in sorted(test_files):
1155             test_name = self._port.relative_test_filename(test_file)
1156             test_url = self._port.filename_to_uri(test_file)
1157             page += u"<p><a href='%s'>%s</a><br />\n" % (test_url, test_name)
1158             test_failures = failures.get(test_file, [])
1159             for failure in test_failures:
1160                 page += (u"&nbsp;&nbsp;%s<br/>" %
1161                          failure.result_html_output(test_name))
1162             page += "</p>\n"
1163         page += "</body></html>\n"
1164         return page
1165
1166     def _write_results_html_file(self, result_summary):
1167         """Write results.html which is a summary of tests that failed.
1168
1169         Args:
1170           result_summary: a summary of the results :)
1171
1172         Returns:
1173           True if any results were written (since expected failures may be
1174           omitted)
1175         """
1176         # test failures
1177         if self._options.full_results_html:
1178             results_title = "Test Failures"
1179             test_files = result_summary.failures.keys()
1180         else:
1181             results_title = "Unexpected Test Failures"
1182             unexpected_failures = self._get_failures(result_summary,
1183                 include_crashes=True)
1184             test_files = unexpected_failures.keys()
1185         if not len(test_files):
1186             return False
1187
1188         out_filename = os.path.join(self._options.results_directory,
1189                                     "results.html")
1190         with codecs.open(out_filename, "w", "utf-8") as results_file:
1191             html = self._results_html(test_files, result_summary.failures, results_title)
1192             results_file.write(html)
1193
1194         return True
1195
1196     def _show_results_html_file(self):
1197         """Shows the results.html page."""
1198         results_filename = os.path.join(self._options.results_directory,
1199                                         "results.html")
1200         self._port.show_results_html_file(results_filename)
1201
1202
1203 def read_test_files(files):
1204     tests = []
1205     for file in files:
1206         try:
1207             with codecs.open(file, 'r', 'utf-8') as file_contents:
1208                 # FIXME: This could be cleaner using a list comprehension.
1209                 for line in file_contents:
1210                     line = test_expectations.strip_comments(line)
1211                     if line:
1212                         tests.append(line)
1213         except IOError, e:
1214             if e.errno == errno.ENOENT:
1215                 _log.critical('')
1216                 _log.critical('--test-list file "%s" not found' % file)
1217             raise
1218     return tests