cb2552ce5ea0661b8db52a2d40b36498f0668521
[WebKit-https.git] / Tools / Scripts / webkitpy / layout_tests / controllers / manager.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010 Google Inc. All rights reserved.
3 # Copyright (C) 2010 Gabor Rapcsanyi (rgabor@inf.u-szeged.hu), University of Szeged
4 #
5 # Redistribution and use in source and binary forms, with or without
6 # modification, are permitted provided that the following conditions are
7 # met:
8 #
9 #     * Redistributions of source code must retain the above copyright
10 # notice, this list of conditions and the following disclaimer.
11 #     * Redistributions in binary form must reproduce the above
12 # copyright notice, this list of conditions and the following disclaimer
13 # in the documentation and/or other materials provided with the
14 # distribution.
15 #     * Neither the name of Google Inc. nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """
32 The Manager runs a series of tests (TestType interface) against a set
33 of test files.  If a test file fails a TestType, it returns a list of TestFailure
34 objects to the Manager. The Manager then aggregates the TestFailures to
35 create a final report.
36 """
37
38 from __future__ import with_statement
39
40 import errno
41 import logging
42 import math
43 import Queue
44 import random
45 import re
46 import sys
47 import time
48
49 from webkitpy.common.checkout.scm import default_scm
50 from webkitpy.layout_tests.controllers import manager_worker_broker
51 from webkitpy.layout_tests.controllers import worker
52 from webkitpy.layout_tests.layout_package import json_layout_results_generator
53 from webkitpy.layout_tests.layout_package import json_results_generator
54 from webkitpy.layout_tests.models import test_expectations
55 from webkitpy.layout_tests.models import test_failures
56 from webkitpy.layout_tests.models import test_results
57 from webkitpy.layout_tests.models.test_input import TestInput
58 from webkitpy.layout_tests.models.result_summary import ResultSummary
59 from webkitpy.layout_tests.views import printing
60
61 from webkitpy.tool import grammar
62
63 _log = logging.getLogger(__name__)
64
65 # Builder base URL where we have the archived test results.
66 BUILDER_BASE_URL = "http://build.chromium.org/buildbot/layout_test_results/"
67
68 TestExpectations = test_expectations.TestExpectations
69
70
71 # FIXME: This should be on the Manager class (since that's the only caller)
72 # or split off from Manager onto another helper class, but should not be a free function.
73 # Most likely this should be made into its own class, and this super-long function
74 # split into many helper functions.
75 def summarize_results(port_obj, expectations, result_summary, retry_summary, test_timings, only_unexpected, interrupted):
76     """Summarize failing results as a dict.
77
78     FIXME: split this data structure into a separate class?
79
80     Args:
81         port_obj: interface to port-specific hooks
82         expectations: test_expectations.TestExpectations object
83         result_summary: summary object from initial test runs
84         retry_summary: summary object from final test run of retried tests
85         test_timings: a list of TestResult objects which contain test runtimes in seconds
86         only_unexpected: whether to return a summary only for the unexpected results
87     Returns:
88         A dictionary containing a summary of the unexpected results from the
89         run, with the following fields:
90         'version': a version indicator
91         'fixable': The number of fixable tests (NOW - PASS)
92         'skipped': The number of skipped tests (NOW & SKIPPED)
93         'num_regressions': The number of non-flaky failures
94         'num_flaky': The number of flaky failures
95         'num_missing': The number of tests with missing results
96         'num_passes': The number of unexpected passes
97         'tests': a dict of tests -> {'expected': '...', 'actual': '...'}
98     """
99     results = {}
100     results['version'] = 3
101
102     tbe = result_summary.tests_by_expectation
103     tbt = result_summary.tests_by_timeline
104     results['fixable'] = len(tbt[test_expectations.NOW] - tbe[test_expectations.PASS])
105     results['skipped'] = len(tbt[test_expectations.NOW] & tbe[test_expectations.SKIP])
106
107     num_passes = 0
108     num_flaky = 0
109     num_missing = 0
110     num_regressions = 0
111     keywords = {}
112     for expecation_string, expectation_enum in TestExpectations.EXPECTATIONS.iteritems():
113         keywords[expectation_enum] = expecation_string.upper()
114
115     for modifier_string, modifier_enum in TestExpectations.MODIFIERS.iteritems():
116         keywords[modifier_enum] = modifier_string.upper()
117
118     tests = {}
119     original_results = result_summary.unexpected_results if only_unexpected else result_summary.results
120
121     for test_name, result in original_results.iteritems():
122         # Note that if a test crashed in the original run, we ignore
123         # whether or not it crashed when we retried it (if we retried it),
124         # and always consider the result not flaky.
125         expected = expectations.get_expectations_string(test_name)
126         result_type = result.type
127         actual = [keywords[result_type]]
128
129         if result_type == test_expectations.SKIP:
130             continue
131
132         test_dict = {}
133         if result.has_stderr:
134             test_dict['has_stderr'] = True
135
136         if result_type == test_expectations.PASS:
137             num_passes += 1
138             # FIXME: include passing tests that have stderr output.
139             if expected == 'PASS':
140                 continue
141         elif result_type == test_expectations.CRASH:
142             num_regressions += 1
143         elif result_type == test_expectations.MISSING:
144             if test_name in result_summary.unexpected_results:
145                 num_missing += 1
146         elif test_name in result_summary.unexpected_results:
147             if test_name not in retry_summary.unexpected_results:
148                 actual.extend(expectations.get_expectations_string(test_name).split(" "))
149                 num_flaky += 1
150             else:
151                 retry_result_type = retry_summary.unexpected_results[test_name].type
152                 if result_type != retry_result_type:
153                     actual.append(keywords[retry_result_type])
154                     num_flaky += 1
155                 else:
156                     num_regressions += 1
157
158         test_dict['expected'] = expected
159         test_dict['actual'] = " ".join(actual)
160         # FIXME: Set this correctly once https://webkit.org/b/37739 is fixed
161         # and only set it if there actually is stderr data.
162
163         failure_types = [type(f) for f in result.failures]
164         # FIXME: get rid of all this is_* values once there is a 1:1 map between
165         # TestFailure type and test_expectations.EXPECTATION.
166         if test_failures.FailureMissingAudio in failure_types:
167             test_dict['is_missing_audio'] = True
168
169         if test_failures.FailureReftestMismatch in failure_types:
170             test_dict['is_reftest'] = True
171
172         for f in result.failures:
173             if 'is_reftest' in result.failures:
174                 test_dict['is_reftest'] = True
175             if type(f) is test_failures.FailureImageHashMismatch:
176                 test_dict['image_diff_percent'] = f.diff_percent
177
178         if test_failures.FailureReftestMismatchDidNotOccur in failure_types:
179             test_dict['is_mismatch_reftest'] = True
180
181         if test_failures.FailureMissingResult in failure_types:
182             test_dict['is_missing_text'] = True
183
184         if test_failures.FailureMissingImage in failure_types or test_failures.FailureMissingImageHash in failure_types:
185             test_dict['is_missing_image'] = True
186
187         # Store test hierarchically by directory. e.g.
188         # foo/bar/baz.html: test_dict
189         # foo/bar/baz1.html: test_dict
190         #
191         # becomes
192         # foo: {
193         #     bar: {
194         #         baz.html: test_dict,
195         #         baz1.html: test_dict
196         #     }
197         # }
198         parts = test_name.split('/')
199         current_map = tests
200         for i, part in enumerate(parts):
201             if i == (len(parts) - 1):
202                 current_map[part] = test_dict
203                 break
204             if part not in current_map:
205                 current_map[part] = {}
206             current_map = current_map[part]
207
208     results['tests'] = tests
209     results['num_passes'] = num_passes
210     results['num_flaky'] = num_flaky
211     results['num_missing'] = num_missing
212     results['num_regressions'] = num_regressions
213     results['uses_expectations_file'] = port_obj.uses_test_expectations_file()
214     results['interrupted'] = interrupted  # Does results.html have enough information to compute this itself? (by checking total number of results vs. total number of tests?)
215     results['layout_tests_dir'] = port_obj.layout_tests_dir()
216     results['has_wdiff'] = port_obj.wdiff_available()
217     results['has_pretty_patch'] = port_obj.pretty_patch_available()
218     try:
219         results['revision'] = default_scm().head_svn_revision()
220     except Exception, e:
221         # FIXME: We would like to warn here, but that would cause all passing_run integration tests
222         # to fail, since they assert that we have no logging output.
223         # The revision lookup always fails when running the tests since it tries to read from
224         # "/mock-checkout" using the real file system (since there is no way to mock out detect_scm_system at current).
225         # Once we fix detect_scm_system to use the mock file system we can add this log back.
226         #_log.warn("Failed to determine svn revision for checkout (cwd: %s, webkit_base: %s), leaving 'revision' key blank in full_results.json.\n%s" % (port_obj._filesystem.getcwd(), port_obj.path_from_webkit_base(), e))
227         # Handle cases where we're running outside of version control.
228         import traceback
229         _log.debug('Failed to learn head svn revision:')
230         _log.debug(traceback.format_exc())
231         results['revision'] = ""
232
233     return results
234
235
236 class TestRunInterruptedException(Exception):
237     """Raised when a test run should be stopped immediately."""
238     def __init__(self, reason):
239         Exception.__init__(self)
240         self.reason = reason
241         self.msg = reason
242
243     def __reduce__(self):
244         return self.__class__, (self.reason,)
245
246
247 class WorkerException(Exception):
248     """Raised when we receive an unexpected/unknown exception from a worker."""
249     pass
250
251
252 class TestShard(object):
253     """A test shard is a named list of TestInputs."""
254
255     # FIXME: Make this class visible, used by workers as well.
256     def __init__(self, name, test_inputs):
257         self.name = name
258         self.test_inputs = test_inputs
259
260     def __repr__(self):
261         return "TestShard(name='%s', test_inputs=%s'" % (self.name, self.test_inputs)
262
263     def __eq__(self, other):
264         return self.name == other.name and self.test_inputs == other.test_inputs
265
266
267 class Manager(object):
268     """A class for managing running a series of tests on a series of layout
269     test files."""
270
271
272     # The per-test timeout in milliseconds, if no --time-out-ms option was
273     # given to run_webkit_tests. This should correspond to the default timeout
274     # in DumpRenderTree.
275     DEFAULT_TEST_TIMEOUT_MS = 6 * 1000
276
277     def __init__(self, port, options, printer):
278         """Initialize test runner data structures.
279
280         Args:
281           port: an object implementing port-specific
282           options: a dictionary of command line options
283           printer: a Printer object to record updates to.
284         """
285         self._port = port
286         self._fs = port.filesystem
287         self._options = options
288         self._printer = printer
289         self._message_broker = None
290         self._expectations = None
291
292         self.HTTP_SUBDIR = 'http' + port.TEST_PATH_SEPARATOR
293         self.WEBSOCKET_SUBDIR = 'websocket' + port.TEST_PATH_SEPARATOR
294         self.LAYOUT_TESTS_DIRECTORY = 'LayoutTests'
295         self._has_http_lock = False
296
297         self._remaining_locked_shards = []
298
299         # disable wss server. need to install pyOpenSSL on buildbots.
300         # self._websocket_secure_server = websocket_server.PyWebSocket(
301         #        options.results_directory, use_tls=True, port=9323)
302
303         # a set of test files, and the same tests as a list
304
305         # FIXME: Rename to test_names.
306         self._test_files = set()
307         self._test_files_list = None
308         self._result_queue = Queue.Queue()
309         self._retrying = False
310         self._results_directory = self._port.results_directory()
311
312         self._all_results = []
313         self._group_stats = {}
314         self._current_result_summary = None
315
316         # This maps worker names to the state we are tracking for each of them.
317         self._worker_states = {}
318
319     def collect_tests(self, args):
320         """Find all the files to test.
321
322         Args:
323           args: list of test arguments from the command line
324
325         """
326         paths = self._strip_test_dir_prefixes(args)
327         if self._options.test_list:
328             paths += self._strip_test_dir_prefixes(read_test_files(self._fs, self._options.test_list, self._port.TEST_PATH_SEPARATOR))
329         self._test_files = self._port.tests(paths)
330
331     def _strip_test_dir_prefixes(self, paths):
332         return [self._strip_test_dir_prefix(path) for path in paths if path]
333
334     def _strip_test_dir_prefix(self, path):
335         # Handle both "LayoutTests/foo/bar.html" and "LayoutTests\foo\bar.html" if
336         # the filesystem uses '\\' as a directory separator.
337         if path.startswith(self.LAYOUT_TESTS_DIRECTORY + self._port.TEST_PATH_SEPARATOR):
338             return path[len(self.LAYOUT_TESTS_DIRECTORY + self._port.TEST_PATH_SEPARATOR):]
339         if path.startswith(self.LAYOUT_TESTS_DIRECTORY + self._fs.sep):
340             return path[len(self.LAYOUT_TESTS_DIRECTORY + self._fs.sep):]
341         return path
342
343     def lint(self):
344         lint_failed = False
345         for test_configuration in self._port.all_test_configurations():
346             try:
347                 self.lint_expectations(test_configuration)
348             except test_expectations.ParseError:
349                 lint_failed = True
350                 self._printer.write("")
351
352         if lint_failed:
353             _log.error("Lint failed.")
354             return -1
355
356         _log.info("Lint succeeded.")
357         return 0
358
359     def lint_expectations(self, config):
360         port = self._port
361         test_expectations.TestExpectations(
362             port,
363             None,
364             port.test_expectations(),
365             config,
366             self._options.lint_test_files,
367             port.test_expectations_overrides())
368
369     def _is_http_test(self, test):
370         return self.HTTP_SUBDIR in test or self.WEBSOCKET_SUBDIR in test
371
372     def _http_tests(self):
373         return set(test for test in self._test_files if self._is_http_test(test))
374
375     def parse_expectations(self):
376         """Parse the expectations from the test_list files and return a data
377         structure holding them. Throws an error if the test_list files have
378         invalid syntax."""
379         port = self._port
380         self._expectations = test_expectations.TestExpectations(
381             port,
382             self._test_files,
383             port.test_expectations(),
384             port.test_configuration(),
385             self._options.lint_test_files,
386             port.test_expectations_overrides())
387
388     # FIXME: This method is way too long and needs to be broken into pieces.
389     def prepare_lists_and_print_output(self):
390         """Create appropriate subsets of test lists and returns a
391         ResultSummary object. Also prints expected test counts.
392         """
393
394         # Remove skipped - both fixable and ignored - files from the
395         # top-level list of files to test.
396         num_all_test_files = len(self._test_files)
397         self._printer.print_expected("Found:  %d tests" %
398                                      (len(self._test_files)))
399         if not num_all_test_files:
400             _log.critical('No tests to run.')
401             return None
402
403         skipped = set()
404
405         if not self._options.http:
406             skipped = skipped.union(self._http_tests())
407
408         if num_all_test_files > 1 and not self._options.force:
409             skipped = skipped.union(self._expectations.get_tests_with_result_type(test_expectations.SKIP))
410             if self._options.skip_failing_tests:
411                 failing = self._expectations.get_tests_with_result_type(test_expectations.FAIL)
412                 self._test_files -= failing
413
414         self._test_files -= skipped
415
416         # Create a sorted list of test files so the subset chunk,
417         # if used, contains alphabetically consecutive tests.
418         self._test_files_list = list(self._test_files)
419         if self._options.randomize_order:
420             random.shuffle(self._test_files_list)
421         else:
422             self._test_files_list.sort(key=lambda test: test_key(self._port, test))
423
424         # If the user specifies they just want to run a subset of the tests,
425         # just grab a subset of the non-skipped tests.
426         if self._options.run_chunk or self._options.run_part:
427             chunk_value = self._options.run_chunk or self._options.run_part
428             test_files = self._test_files_list
429             try:
430                 (chunk_num, chunk_len) = chunk_value.split(":")
431                 chunk_num = int(chunk_num)
432                 assert(chunk_num >= 0)
433                 test_size = int(chunk_len)
434                 assert(test_size > 0)
435             except AssertionError:
436                 _log.critical("invalid chunk '%s'" % chunk_value)
437                 return None
438
439             # Get the number of tests
440             num_tests = len(test_files)
441
442             # Get the start offset of the slice.
443             if self._options.run_chunk:
444                 chunk_len = test_size
445                 # In this case chunk_num can be really large. We need
446                 # to make the slave fit in the current number of tests.
447                 slice_start = (chunk_num * chunk_len) % num_tests
448             else:
449                 # Validate the data.
450                 assert(test_size <= num_tests)
451                 assert(chunk_num <= test_size)
452
453                 # To count the chunk_len, and make sure we don't skip
454                 # some tests, we round to the next value that fits exactly
455                 # all the parts.
456                 rounded_tests = num_tests
457                 if rounded_tests % test_size != 0:
458                     rounded_tests = (num_tests + test_size -
459                                      (num_tests % test_size))
460
461                 chunk_len = rounded_tests / test_size
462                 slice_start = chunk_len * (chunk_num - 1)
463                 # It does not mind if we go over test_size.
464
465             # Get the end offset of the slice.
466             slice_end = min(num_tests, slice_start + chunk_len)
467
468             files = test_files[slice_start:slice_end]
469
470             tests_run_msg = 'Running: %d tests (chunk slice [%d:%d] of %d)' % (
471                 (slice_end - slice_start), slice_start, slice_end, num_tests)
472             self._printer.print_expected(tests_run_msg)
473
474             # If we reached the end and we don't have enough tests, we run some
475             # from the beginning.
476             if slice_end - slice_start < chunk_len:
477                 extra = chunk_len - (slice_end - slice_start)
478                 extra_msg = ('   last chunk is partial, appending [0:%d]' %
479                             extra)
480                 self._printer.print_expected(extra_msg)
481                 tests_run_msg += "\n" + extra_msg
482                 files.extend(test_files[0:extra])
483             tests_run_filename = self._fs.join(self._results_directory, "tests_run.txt")
484             self._fs.write_text_file(tests_run_filename, tests_run_msg)
485
486             len_skip_chunk = int(len(files) * len(skipped) /
487                                  float(len(self._test_files)))
488             skip_chunk_list = list(skipped)[0:len_skip_chunk]
489             skip_chunk = set(skip_chunk_list)
490
491             # Update expectations so that the stats are calculated correctly.
492             # We need to pass a list that includes the right # of skipped files
493             # to ParseExpectations so that ResultSummary() will get the correct
494             # stats. So, we add in the subset of skipped files, and then
495             # subtract them back out.
496             self._test_files_list = files + skip_chunk_list
497             self._test_files = set(self._test_files_list)
498
499             self.parse_expectations()
500
501             self._test_files = set(files)
502             self._test_files_list = files
503         else:
504             skip_chunk = skipped
505
506         result_summary = ResultSummary(self._expectations, self._test_files | skip_chunk)
507         self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes")
508         self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures")
509         self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky")
510         self._print_expected_results_of_type(result_summary, test_expectations.SKIP, "skipped")
511
512         if self._options.force:
513             self._printer.print_expected('Running all tests, including '
514                                          'skips (--force)')
515         else:
516             # Note that we don't actually run the skipped tests (they were
517             # subtracted out of self._test_files, above), but we stub out the
518             # results here so the statistics can remain accurate.
519             for test in skip_chunk:
520                 result = test_results.TestResult(test)
521                 result.type = test_expectations.SKIP
522                 result_summary.add(result, expected=True)
523         self._printer.print_expected('')
524
525         # Check to make sure we didn't filter out all of the tests.
526         if not len(self._test_files):
527             _log.info("All tests are being skipped")
528             return None
529
530         return result_summary
531
532     def _get_dir_for_test_file(self, test_file):
533         """Returns the highest-level directory by which to shard the given
534         test file."""
535         directory, test_file = self._port.split_test(test_file)
536
537         # The http tests are very stable on mac/linux.
538         # TODO(ojan): Make the http server on Windows be apache so we can
539         # turn shard the http tests there as well. Switching to apache is
540         # what made them stable on linux/mac.
541         return directory
542
543     def _get_test_input_for_file(self, test_file):
544         """Returns the appropriate TestInput object for the file. Mostly this
545         is used for looking up the timeout value (in ms) to use for the given
546         test."""
547         if self._test_is_slow(test_file):
548             return TestInput(test_file, self._options.slow_time_out_ms)
549         return TestInput(test_file, self._options.time_out_ms)
550
551     def _test_requires_lock(self, test_file):
552         """Return True if the test needs to be locked when
553         running multiple copies of NRWTs."""
554         return self._is_http_test(test_file)
555
556     def _test_is_slow(self, test_file):
557         return self._expectations.has_modifier(test_file, test_expectations.SLOW)
558
559     def _shard_tests(self, test_files, num_workers, fully_parallel):
560         """Groups tests into batches.
561         This helps ensure that tests that depend on each other (aka bad tests!)
562         continue to run together as most cross-tests dependencies tend to
563         occur within the same directory.
564         Return:
565             Two list of TestShards. The first contains tests that must only be
566             run under the server lock, the second can be run whenever.
567         """
568
569         # FIXME: Move all of the sharding logic out of manager into its
570         # own class or module. Consider grouping it with the chunking logic
571         # in prepare_lists as well.
572         if num_workers == 1:
573             return self._shard_in_two(test_files)
574         elif fully_parallel:
575             return self._shard_every_file(test_files)
576         return self._shard_by_directory(test_files, num_workers)
577
578     def _shard_in_two(self, test_files):
579         """Returns two lists of shards, one with all the tests requiring a lock and one with the rest.
580
581         This is used when there's only one worker, to minimize the per-shard overhead."""
582         locked_inputs = []
583         unlocked_inputs = []
584         for test_file in test_files:
585             test_input = self._get_test_input_for_file(test_file)
586             if self._test_requires_lock(test_file):
587                 locked_inputs.append(test_input)
588             else:
589                 unlocked_inputs.append(test_input)
590
591         locked_shards = []
592         unlocked_shards = []
593         if locked_inputs:
594             locked_shards = [TestShard('locked_tests', locked_inputs)]
595         if unlocked_inputs:
596             unlocked_shards = [TestShard('unlocked_tests', unlocked_inputs)]
597
598         return locked_shards, unlocked_shards
599
600     def _shard_every_file(self, test_files):
601         """Returns two lists of shards, each shard containing a single test file.
602
603         This mode gets maximal parallelism at the cost of much higher flakiness."""
604         locked_shards = []
605         unlocked_shards = []
606         for test_file in test_files:
607             test_input = self._get_test_input_for_file(test_file)
608
609             # Note that we use a '.' for the shard name; the name doesn't really
610             # matter, and the only other meaningful value would be the filename,
611             # which would be really redundant.
612             if self._test_requires_lock(test_file):
613                 locked_shards.append(TestShard('.', [test_input]))
614             else:
615                 unlocked_shards.append(TestShard('.', [test_input]))
616
617         return locked_shards, unlocked_shards
618
619     def _shard_by_directory(self, test_files, num_workers):
620         """Returns two lists of shards, each shard containing all the files in a directory.
621
622         This is the default mode, and gets as much parallelism as we can while
623         minimizing flakiness caused by inter-test dependencies."""
624         locked_shards = []
625         unlocked_shards = []
626         tests_by_dir = {}
627         # FIXME: Given that the tests are already sorted by directory,
628         # we can probably rewrite this to be clearer and faster.
629         for test_file in test_files:
630             directory = self._get_dir_for_test_file(test_file)
631             test_input = self._get_test_input_for_file(test_file)
632             tests_by_dir.setdefault(directory, [])
633             tests_by_dir[directory].append(test_input)
634
635         for directory, test_inputs in tests_by_dir.iteritems():
636             shard = TestShard(directory, test_inputs)
637             if self._test_requires_lock(directory):
638                 locked_shards.append(shard)
639             else:
640                 unlocked_shards.append(shard)
641
642         # Sort the shards by directory name.
643         locked_shards.sort(key=lambda shard: shard.name)
644         unlocked_shards.sort(key=lambda shard: shard.name)
645
646         return (self._resize_shards(locked_shards, self._max_locked_shards(num_workers),
647                                     'locked_shard'),
648                 unlocked_shards)
649
650     def _max_locked_shards(self, num_workers):
651         # Put a ceiling on the number of locked shards, so that we
652         # don't hammer the servers too badly.
653
654         # FIXME: For now, limit to one shard. After testing to make sure we
655         # can handle multiple shards, we should probably do something like
656         # limit this to no more than a quarter of all workers, e.g.:
657         # return max(math.ceil(num_workers / 4.0), 1)
658         return 1
659
660     def _resize_shards(self, old_shards, max_new_shards, shard_name_prefix):
661         """Takes a list of shards and redistributes the tests into no more
662         than |max_new_shards| new shards."""
663
664         # This implementation assumes that each input shard only contains tests from a
665         # single directory, and that tests in each shard must remain together; as a
666         # result, a given input shard is never split between output shards.
667         #
668         # Each output shard contains the tests from one or more input shards and
669         # hence may contain tests from multiple directories.
670
671         def divide_and_round_up(numerator, divisor):
672             return int(math.ceil(float(numerator) / divisor))
673
674         def extract_and_flatten(shards):
675             test_inputs = []
676             for shard in shards:
677                 test_inputs.extend(shard.test_inputs)
678             return test_inputs
679
680         def split_at(seq, index):
681             return (seq[:index], seq[index:])
682
683         num_old_per_new = divide_and_round_up(len(old_shards), max_new_shards)
684         new_shards = []
685         remaining_shards = old_shards
686         while remaining_shards:
687             some_shards, remaining_shards = split_at(remaining_shards, num_old_per_new)
688             new_shards.append(TestShard('%s_%d' % (shard_name_prefix, len(new_shards) + 1),
689                                         extract_and_flatten(some_shards)))
690         return new_shards
691
692     def _log_num_workers(self, num_workers, num_shards, num_locked_shards):
693         driver_name = self._port.driver_name()
694         if num_workers == 1:
695             self._printer.print_config("Running 1 %s over %s" %
696                 (driver_name, grammar.pluralize('shard', num_shards)))
697         else:
698             self._printer.print_config("Running %d %ss in parallel over %d shards (%d locked)" %
699                 (num_workers, driver_name, num_shards, num_locked_shards))
700
701     def _run_tests(self, file_list, result_summary):
702         """Runs the tests in the file_list.
703
704         Return: A tuple (interrupted, keyboard_interrupted, thread_timings,
705             test_timings, individual_test_timings)
706             interrupted is whether the run was interrupted
707             keyboard_interrupted is whether the interruption was because someone
708               typed Ctrl^C
709             thread_timings is a list of dicts with the total runtime
710               of each thread with 'name', 'num_tests', 'total_time' properties
711             test_timings is a list of timings for each sharded subdirectory
712               of the form [time, directory_name, num_tests]
713             individual_test_timings is a list of run times for each test
714               in the form {filename:filename, test_run_time:test_run_time}
715             result_summary: summary object to populate with the results
716         """
717         self._current_result_summary = result_summary
718         self._all_results = []
719         self._group_stats = {}
720         self._worker_states = {}
721
722         keyboard_interrupted = False
723         interrupted = False
724         thread_timings = []
725
726         self._printer.print_update('Sharding tests ...')
727         locked_shards, unlocked_shards = self._shard_tests(file_list, int(self._options.child_processes), self._options.experimental_fully_parallel)
728
729         # FIXME: We don't have a good way to coordinate the workers so that
730         # they don't try to run the shards that need a lock if we don't actually
731         # have the lock. The easiest solution at the moment is to grab the
732         # lock at the beginning of the run, and then run all of the locked
733         # shards first. This minimizes the time spent holding the lock, but
734         # means that we won't be running tests while we're waiting for the lock.
735         # If this becomes a problem in practice we'll need to change this.
736
737         all_shards = locked_shards + unlocked_shards
738         self._remaining_locked_shards = locked_shards
739         if locked_shards:
740             self.start_servers_with_lock()
741
742         num_workers = min(int(self._options.child_processes), len(all_shards))
743         self._log_num_workers(num_workers, len(all_shards), len(locked_shards))
744
745         manager_connection = manager_worker_broker.get(self._port, self._options, self, worker.Worker)
746
747         if self._options.dry_run:
748             return (keyboard_interrupted, interrupted, thread_timings, self._group_stats, self._all_results)
749
750         self._printer.print_update('Starting %s ...' % grammar.pluralize('worker', num_workers))
751         for worker_number in xrange(num_workers):
752             worker_connection = manager_connection.start_worker(worker_number)
753             worker_state = _WorkerState(worker_number, worker_connection)
754             self._worker_states[worker_connection.name] = worker_state
755
756             # FIXME: If we start workers up too quickly, DumpRenderTree appears
757             # to thrash on something and time out its first few tests. Until
758             # we can figure out what's going on, sleep a bit in between
759             # workers. This needs a bug filed.
760             time.sleep(0.1)
761
762         self._printer.print_update("Starting testing ...")
763         for shard in all_shards:
764             # FIXME: Change 'test_list' to 'shard', make sharding public.
765             manager_connection.post_message('test_list', shard.name, shard.test_inputs)
766
767         # We post one 'stop' message for each worker. Because the stop message
768         # are sent after all of the tests, and because each worker will stop
769         # reading messsages after receiving a stop, we can be sure each
770         # worker will get a stop message and hence they will all shut down.
771         for _ in xrange(num_workers):
772             manager_connection.post_message('stop')
773
774         try:
775             while not self.is_done():
776                 manager_connection.run_message_loop(delay_secs=1.0)
777
778             # Make sure all of the workers have shut down (if possible).
779             for worker_state in self._worker_states.values():
780                 if worker_state.worker_connection.is_alive():
781                     _log.debug('Waiting for worker %d to exit' % worker_state.number)
782                     worker_state.worker_connection.join(5.0)
783                     if worker_state.worker_connection.is_alive():
784                         _log.error('Worker %d did not exit in time.' % worker_state.number)
785
786         except KeyboardInterrupt:
787             self._printer.print_update('Interrupted, exiting ...')
788             self.cancel_workers()
789             keyboard_interrupted = True
790         except TestRunInterruptedException, e:
791             _log.warning(e.reason)
792             self.cancel_workers()
793             interrupted = True
794         except WorkerException:
795             self.cancel_workers()
796             raise
797         except:
798             # Unexpected exception; don't try to clean up workers.
799             _log.error("Exception raised, exiting")
800             self.cancel_workers()
801             raise
802         finally:
803             self.stop_servers_with_lock()
804
805         thread_timings = [worker_state.stats for worker_state in self._worker_states.values()]
806
807         # FIXME: should this be a class instead of a tuple?
808         return (interrupted, keyboard_interrupted, thread_timings, self._group_stats, self._all_results)
809
810     def update(self):
811         self.update_summary(self._current_result_summary)
812
813     def _collect_timing_info(self, threads):
814         test_timings = {}
815         individual_test_timings = []
816         thread_timings = []
817
818         for thread in threads:
819             thread_timings.append({'name': thread.getName(),
820                                    'num_tests': thread.get_num_tests(),
821                                    'total_time': thread.get_total_time()})
822             test_timings.update(thread.get_test_group_timing_stats())
823             individual_test_timings.extend(thread.get_test_results())
824
825         return (thread_timings, test_timings, individual_test_timings)
826
827     def needs_servers(self):
828         return any(self._test_requires_lock(test_name) for test_name in self._test_files) and self._options.http
829
830     def set_up_run(self):
831         """Configures the system to be ready to run tests.
832
833         Returns a ResultSummary object if we should continue to run tests,
834         or None if we should abort.
835
836         """
837         # This must be started before we check the system dependencies,
838         # since the helper may do things to make the setup correct.
839         self._printer.print_update("Starting helper ...")
840         self._port.start_helper()
841
842         # Check that the system dependencies (themes, fonts, ...) are correct.
843         if not self._options.nocheck_sys_deps:
844             self._printer.print_update("Checking system dependencies ...")
845             if not self._port.check_sys_deps(self.needs_servers()):
846                 self._port.stop_helper()
847                 return None
848
849         if self._options.clobber_old_results:
850             self._clobber_old_results()
851
852         # Create the output directory if it doesn't already exist.
853         self._port.maybe_make_directory(self._results_directory)
854
855         self._port.setup_test_run()
856
857         self._printer.print_update("Preparing tests ...")
858         result_summary = self.prepare_lists_and_print_output()
859         if not result_summary:
860             return None
861
862         return result_summary
863
864     def run(self, result_summary):
865         """Run all our tests on all our test files.
866
867         For each test file, we run each test type. If there are any failures,
868         we collect them for reporting.
869
870         Args:
871           result_summary: a summary object tracking the test results.
872
873         Return:
874           The number of unexpected results (0 == success)
875         """
876         # gather_test_files() must have been called first to initialize us.
877         # If we didn't find any files to test, we've errored out already in
878         # prepare_lists_and_print_output().
879         assert(len(self._test_files))
880
881         start_time = time.time()
882
883         interrupted, keyboard_interrupted, thread_timings, test_timings, individual_test_timings = self._run_tests(self._test_files_list, result_summary)
884
885         # We exclude the crashes from the list of results to retry, because
886         # we want to treat even a potentially flaky crash as an error.
887         failures = self._get_failures(result_summary, include_crashes=False, include_missing=False)
888         retry_summary = result_summary
889         while (len(failures) and self._options.retry_failures and not self._retrying and not interrupted and not keyboard_interrupted):
890             _log.info('')
891             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
892             _log.info('')
893             self._retrying = True
894             retry_summary = ResultSummary(self._expectations, failures.keys())
895             # Note that we intentionally ignore the return value here.
896             self._run_tests(failures.keys(), retry_summary)
897             failures = self._get_failures(retry_summary, include_crashes=True, include_missing=True)
898
899         end_time = time.time()
900
901         self._print_timing_statistics(end_time - start_time, thread_timings, test_timings, individual_test_timings, result_summary)
902         self._print_result_summary(result_summary)
903
904         sys.stdout.flush()
905         sys.stderr.flush()
906
907         self._printer.print_one_line_summary(result_summary.total, result_summary.expected, result_summary.unexpected)
908
909         unexpected_results = summarize_results(self._port, self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=True, interrupted=interrupted)
910         self._printer.print_unexpected_results(unexpected_results)
911
912         # Re-raise a KeyboardInterrupt if necessary so the caller can handle it.
913         if keyboard_interrupted:
914             raise KeyboardInterrupt
915
916         # FIXME: remove record_results. It's just used for testing. There's no need
917         # for it to be a commandline argument.
918         if (self._options.record_results and not self._options.dry_run and not keyboard_interrupted):
919             self._port.print_leaks_summary()
920             # Write the same data to log files and upload generated JSON files to appengine server.
921             summarized_results = summarize_results(self._port, self._expectations, result_summary, retry_summary, individual_test_timings, only_unexpected=False, interrupted=interrupted)
922             self._upload_json_files(summarized_results, result_summary, individual_test_timings)
923
924         # Write the summary to disk (results.html) and display it if requested.
925         if not self._options.dry_run:
926             self._copy_results_html_file()
927             if self._options.show_results:
928                 self._show_results_html_file(result_summary)
929
930         return self._port.exit_code_from_summarized_results(unexpected_results)
931
932     def start_servers_with_lock(self):
933         assert(self._options.http)
934         self._printer.print_update('Acquiring http lock ...')
935         self._port.acquire_http_lock()
936         self._printer.print_update('Starting HTTP server ...')
937         self._port.start_http_server()
938         self._printer.print_update('Starting WebSocket server ...')
939         self._port.start_websocket_server()
940         self._has_http_lock = True
941
942     def stop_servers_with_lock(self):
943         if self._has_http_lock:
944             self._printer.print_update('Stopping HTTP server ...')
945             self._port.stop_http_server()
946             self._printer.print_update('Stopping WebSocket server ...')
947             self._port.stop_websocket_server()
948             self._printer.print_update('Releasing server lock ...')
949             self._port.release_http_lock()
950             self._has_http_lock = False
951
952     def clean_up_run(self):
953         """Restores the system after we're done running tests."""
954
955         _log.debug("flushing stdout")
956         sys.stdout.flush()
957         _log.debug("flushing stderr")
958         sys.stderr.flush()
959         _log.debug("stopping helper")
960         self._port.stop_helper()
961
962     def update_summary(self, result_summary):
963         """Update the summary and print results with any completed tests."""
964         while True:
965             try:
966                 result = test_results.TestResult.loads(self._result_queue.get_nowait())
967             except Queue.Empty:
968                 self._printer.print_progress(result_summary, self._retrying, self._test_files_list)
969                 return
970
971             self._update_summary_with_result(result_summary, result)
972
973     def _interrupt_if_at_failure_limits(self, result_summary):
974         # Note: The messages in this method are constructed to match old-run-webkit-tests
975         # so that existing buildbot grep rules work.
976         def interrupt_if_at_failure_limit(limit, failure_count, result_summary, message):
977             if limit and failure_count >= limit:
978                 message += " %d tests run." % (result_summary.expected + result_summary.unexpected)
979                 raise TestRunInterruptedException(message)
980
981         interrupt_if_at_failure_limit(
982             self._options.exit_after_n_failures,
983             result_summary.unexpected_failures,
984             result_summary,
985             "Exiting early after %d failures." % result_summary.unexpected_failures)
986         interrupt_if_at_failure_limit(
987             self._options.exit_after_n_crashes_or_timeouts,
988             result_summary.unexpected_crashes + result_summary.unexpected_timeouts,
989             result_summary,
990             # This differs from ORWT because it does not include WebProcess crashes.
991             "Exiting early after %d crashes and %d timeouts." % (result_summary.unexpected_crashes, result_summary.unexpected_timeouts))
992
993     def _update_summary_with_result(self, result_summary, result):
994         if result.type == test_expectations.SKIP:
995             result_summary.add(result, expected=True)
996         else:
997             expected = self._expectations.matches_an_expected_result(result.test_name, result.type, self._options.pixel_tests)
998             result_summary.add(result, expected)
999             exp_str = self._expectations.get_expectations_string(result.test_name)
1000             got_str = self._expectations.expectation_to_string(result.type)
1001             self._printer.print_test_result(result, expected, exp_str, got_str)
1002         self._printer.print_progress(result_summary, self._retrying, self._test_files_list)
1003         self._interrupt_if_at_failure_limits(result_summary)
1004
1005     def _clobber_old_results(self):
1006         # Just clobber the actual test results directories since the other
1007         # files in the results directory are explicitly used for cross-run
1008         # tracking.
1009         self._printer.print_update("Clobbering old results in %s" %
1010                                    self._results_directory)
1011         layout_tests_dir = self._port.layout_tests_dir()
1012         possible_dirs = self._port.test_dirs()
1013         for dirname in possible_dirs:
1014             if self._fs.isdir(self._fs.join(layout_tests_dir, dirname)):
1015                 self._fs.rmtree(self._fs.join(self._results_directory, dirname))
1016
1017     def _get_failures(self, result_summary, include_crashes, include_missing):
1018         """Filters a dict of results and returns only the failures.
1019
1020         Args:
1021           result_summary: the results of the test run
1022           include_crashes: whether crashes are included in the output.
1023             We use False when finding the list of failures to retry
1024             to see if the results were flaky. Although the crashes may also be
1025             flaky, we treat them as if they aren't so that they're not ignored.
1026         Returns:
1027           a dict of files -> results
1028         """
1029         failed_results = {}
1030         for test, result in result_summary.unexpected_results.iteritems():
1031             if (result.type == test_expectations.PASS or
1032                 (result.type == test_expectations.CRASH and not include_crashes) or
1033                 (result.type == test_expectations.MISSING and not include_missing)):
1034                 continue
1035             failed_results[test] = result.type
1036
1037         return failed_results
1038
1039     def _char_for_result(self, result):
1040         result = result.lower()
1041         if result in TestExpectations.EXPECTATIONS:
1042             result_enum_value = TestExpectations.EXPECTATIONS[result]
1043         else:
1044             result_enum_value = TestExpectations.MODIFIERS[result]
1045         return json_layout_results_generator.JSONLayoutResultsGenerator.FAILURE_TO_CHAR[result_enum_value]
1046
1047     def _upload_json_files(self, summarized_results, result_summary, individual_test_timings):
1048         """Writes the results of the test run as JSON files into the results
1049         dir and upload the files to the appengine server.
1050
1051         Args:
1052           unexpected_results: dict of unexpected results
1053           summarized_results: dict of results
1054           result_summary: full summary object
1055           individual_test_timings: list of test times (used by the flakiness
1056             dashboard).
1057         """
1058         _log.debug("Writing JSON files in %s." % self._results_directory)
1059
1060         times_trie = json_results_generator.test_timings_trie(self._port, individual_test_timings)
1061         times_json_path = self._fs.join(self._results_directory, "times_ms.json")
1062         json_results_generator.write_json(self._fs, times_trie, times_json_path)
1063
1064         full_results_path = self._fs.join(self._results_directory, "full_results.json")
1065         json_results_generator.write_json(self._fs, summarized_results, full_results_path)
1066
1067         generator = json_layout_results_generator.JSONLayoutResultsGenerator(
1068             self._port, self._options.builder_name, self._options.build_name,
1069             self._options.build_number, self._results_directory,
1070             BUILDER_BASE_URL, individual_test_timings,
1071             self._expectations, result_summary, self._test_files_list,
1072             self._options.test_results_server,
1073             "layout-tests",
1074             self._options.master_name)
1075
1076         _log.debug("Finished writing JSON files.")
1077
1078         json_files = ["incremental_results.json", "full_results.json", "times_ms.json"]
1079
1080         generator.upload_json_files(json_files)
1081
1082         incremental_results_path = self._fs.join(self._results_directory, "incremental_results.json")
1083
1084         # Remove these files from the results directory so they don't take up too much space on the buildbot.
1085         # The tools use the version we uploaded to the results server anyway.
1086         self._fs.remove(times_json_path)
1087         self._fs.remove(incremental_results_path)
1088
1089     def print_config(self):
1090         """Prints the configuration for the test run."""
1091         p = self._printer
1092         p.print_config("Using port '%s'" % self._port.name())
1093         p.print_config("Test configuration: %s" % self._port.test_configuration())
1094         p.print_config("Placing test results in %s" % self._results_directory)
1095         if self._options.new_baseline:
1096             p.print_config("Placing new baselines in %s" %
1097                            self._port.baseline_path())
1098
1099         fallback_path = [self._fs.split(x)[1] for x in self._port.baseline_search_path()]
1100         p.print_config("Baseline search path: %s -> generic" % " -> ".join(fallback_path))
1101
1102         p.print_config("Using %s build" % self._options.configuration)
1103         if self._options.pixel_tests:
1104             p.print_config("Pixel tests enabled")
1105         else:
1106             p.print_config("Pixel tests disabled")
1107
1108         p.print_config("Regular timeout: %s, slow test timeout: %s" %
1109                        (self._options.time_out_ms,
1110                         self._options.slow_time_out_ms))
1111
1112         p.print_config('Command line: ' +
1113                        ' '.join(self._port.driver_cmd_line()))
1114         p.print_config("Worker model: %s" % self._options.worker_model)
1115         p.print_config("")
1116
1117     def _print_expected_results_of_type(self, result_summary,
1118                                         result_type, result_type_str):
1119         """Print the number of the tests in a given result class.
1120
1121         Args:
1122           result_summary - the object containing all the results to report on
1123           result_type - the particular result type to report in the summary.
1124           result_type_str - a string description of the result_type.
1125         """
1126         tests = self._expectations.get_tests_with_result_type(result_type)
1127         now = result_summary.tests_by_timeline[test_expectations.NOW]
1128         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
1129
1130         # We use a fancy format string in order to print the data out in a
1131         # nicely-aligned table.
1132         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
1133                   % (self._num_digits(now), self._num_digits(wontfix)))
1134         self._printer.print_expected(fmtstr %
1135             (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
1136
1137     def _num_digits(self, num):
1138         """Returns the number of digits needed to represent the length of a
1139         sequence."""
1140         ndigits = 1
1141         if len(num):
1142             ndigits = int(math.log10(len(num))) + 1
1143         return ndigits
1144
1145     def _print_timing_statistics(self, total_time, thread_timings,
1146                                directory_test_timings, individual_test_timings,
1147                                result_summary):
1148         """Record timing-specific information for the test run.
1149
1150         Args:
1151           total_time: total elapsed time (in seconds) for the test run
1152           thread_timings: wall clock time each thread ran for
1153           directory_test_timings: timing by directory
1154           individual_test_timings: timing by file
1155           result_summary: summary object for the test run
1156         """
1157         self._printer.print_timing("Test timing:")
1158         self._printer.print_timing("  %6.2f total testing time" % total_time)
1159         self._printer.print_timing("")
1160         self._printer.print_timing("Thread timing:")
1161         cuml_time = 0
1162         for t in thread_timings:
1163             self._printer.print_timing("    %10s: %5d tests, %6.2f secs" %
1164                   (t['name'], t['num_tests'], t['total_time']))
1165             cuml_time += t['total_time']
1166         self._printer.print_timing("   %6.2f cumulative, %6.2f optimal" %
1167               (cuml_time, cuml_time / int(self._options.child_processes)))
1168         self._printer.print_timing("")
1169
1170         self._print_aggregate_test_statistics(individual_test_timings)
1171         self._print_individual_test_times(individual_test_timings,
1172                                           result_summary)
1173         self._print_directory_timings(directory_test_timings)
1174
1175     def _print_aggregate_test_statistics(self, individual_test_timings):
1176         """Prints aggregate statistics (e.g. median, mean, etc.) for all tests.
1177         Args:
1178           individual_test_timings: List of TestResults for all tests.
1179         """
1180         times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
1181         self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):",
1182                                                 times_for_dump_render_tree)
1183
1184     def _print_individual_test_times(self, individual_test_timings,
1185                                   result_summary):
1186         """Prints the run times for slow, timeout and crash tests.
1187         Args:
1188           individual_test_timings: List of TestStats for all tests.
1189           result_summary: summary object for test run
1190         """
1191         # Reverse-sort by the time spent in DumpRenderTree.
1192         individual_test_timings.sort(lambda a, b:
1193             cmp(b.test_run_time, a.test_run_time))
1194
1195         num_printed = 0
1196         slow_tests = []
1197         timeout_or_crash_tests = []
1198         unexpected_slow_tests = []
1199         for test_tuple in individual_test_timings:
1200             test_name = test_tuple.test_name
1201             is_timeout_crash_or_slow = False
1202             if self._test_is_slow(test_name):
1203                 is_timeout_crash_or_slow = True
1204                 slow_tests.append(test_tuple)
1205
1206             if test_name in result_summary.failures:
1207                 result = result_summary.results[test_name].type
1208                 if (result == test_expectations.TIMEOUT or
1209                     result == test_expectations.CRASH):
1210                     is_timeout_crash_or_slow = True
1211                     timeout_or_crash_tests.append(test_tuple)
1212
1213             if (not is_timeout_crash_or_slow and
1214                 num_printed < printing.NUM_SLOW_TESTS_TO_LOG):
1215                 num_printed = num_printed + 1
1216                 unexpected_slow_tests.append(test_tuple)
1217
1218         self._printer.print_timing("")
1219         self._print_test_list_timing("%s slowest tests that are not "
1220             "marked as SLOW and did not timeout/crash:" %
1221             printing.NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
1222         self._printer.print_timing("")
1223         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
1224         self._printer.print_timing("")
1225         self._print_test_list_timing("Tests that timed out or crashed:",
1226                                      timeout_or_crash_tests)
1227         self._printer.print_timing("")
1228
1229     def _print_test_list_timing(self, title, test_list):
1230         """Print timing info for each test.
1231
1232         Args:
1233           title: section heading
1234           test_list: tests that fall in this section
1235         """
1236         if self._printer.disabled('slowest'):
1237             return
1238
1239         self._printer.print_timing(title)
1240         for test_tuple in test_list:
1241             test_run_time = round(test_tuple.test_run_time, 1)
1242             self._printer.print_timing("  %s took %s seconds" % (test_tuple.test_name, test_run_time))
1243
1244     def _print_directory_timings(self, directory_test_timings):
1245         """Print timing info by directory for any directories that
1246         take > 10 seconds to run.
1247
1248         Args:
1249           directory_test_timing: time info for each directory
1250         """
1251         timings = []
1252         for directory in directory_test_timings:
1253             num_tests, time_for_directory = directory_test_timings[directory]
1254             timings.append((round(time_for_directory, 1), directory,
1255                             num_tests))
1256         timings.sort()
1257
1258         self._printer.print_timing("Time to process slowest subdirectories:")
1259         min_seconds_to_print = 10
1260         for timing in timings:
1261             if timing[0] > min_seconds_to_print:
1262                 self._printer.print_timing(
1263                     "  %s took %s seconds to run %s tests." % (timing[1],
1264                     timing[0], timing[2]))
1265         self._printer.print_timing("")
1266
1267     def _print_statistics_for_test_timings(self, title, timings):
1268         """Prints the median, mean and standard deviation of the values in
1269         timings.
1270
1271         Args:
1272           title: Title for these timings.
1273           timings: A list of floats representing times.
1274         """
1275         self._printer.print_timing(title)
1276         timings.sort()
1277
1278         num_tests = len(timings)
1279         if not num_tests:
1280             return
1281         percentile90 = timings[int(.9 * num_tests)]
1282         percentile99 = timings[int(.99 * num_tests)]
1283
1284         if num_tests % 2 == 1:
1285             median = timings[((num_tests - 1) / 2) - 1]
1286         else:
1287             lower = timings[num_tests / 2 - 1]
1288             upper = timings[num_tests / 2]
1289             median = (float(lower + upper)) / 2
1290
1291         mean = sum(timings) / num_tests
1292
1293         for timing in timings:
1294             sum_of_deviations = math.pow(timing - mean, 2)
1295
1296         std_deviation = math.sqrt(sum_of_deviations / num_tests)
1297         self._printer.print_timing("  Median:          %6.3f" % median)
1298         self._printer.print_timing("  Mean:            %6.3f" % mean)
1299         self._printer.print_timing("  90th percentile: %6.3f" % percentile90)
1300         self._printer.print_timing("  99th percentile: %6.3f" % percentile99)
1301         self._printer.print_timing("  Standard dev:    %6.3f" % std_deviation)
1302         self._printer.print_timing("")
1303
1304     def _print_result_summary(self, result_summary):
1305         """Print a short summary about how many tests passed.
1306
1307         Args:
1308           result_summary: information to log
1309         """
1310         failed = len(result_summary.failures)
1311         skipped = len(
1312             result_summary.tests_by_expectation[test_expectations.SKIP])
1313         total = result_summary.total
1314         passed = total - failed - skipped
1315         pct_passed = 0.0
1316         if total > 0:
1317             pct_passed = float(passed) * 100 / total
1318
1319         self._printer.print_actual("")
1320         self._printer.print_actual("=> Results: %d/%d tests passed (%.1f%%)" %
1321                      (passed, total, pct_passed))
1322         self._printer.print_actual("")
1323         self._print_result_summary_entry(result_summary,
1324             test_expectations.NOW, "Tests to be fixed")
1325
1326         self._printer.print_actual("")
1327         self._print_result_summary_entry(result_summary,
1328             test_expectations.WONTFIX,
1329             "Tests that will only be fixed if they crash (WONTFIX)")
1330         self._printer.print_actual("")
1331
1332     def _print_result_summary_entry(self, result_summary, timeline,
1333                                     heading):
1334         """Print a summary block of results for a particular timeline of test.
1335
1336         Args:
1337           result_summary: summary to print results for
1338           timeline: the timeline to print results for (NOT, WONTFIX, etc.)
1339           heading: a textual description of the timeline
1340         """
1341         total = len(result_summary.tests_by_timeline[timeline])
1342         not_passing = (total -
1343            len(result_summary.tests_by_expectation[test_expectations.PASS] &
1344                result_summary.tests_by_timeline[timeline]))
1345         self._printer.print_actual("=> %s (%d):" % (heading, not_passing))
1346
1347         for result in TestExpectations.EXPECTATION_ORDER:
1348             if result == test_expectations.PASS:
1349                 continue
1350             results = (result_summary.tests_by_expectation[result] &
1351                        result_summary.tests_by_timeline[timeline])
1352             desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result]
1353             if not_passing and len(results):
1354                 pct = len(results) * 100.0 / not_passing
1355                 self._printer.print_actual("  %5d %-24s (%4.1f%%)" %
1356                     (len(results), desc[len(results) != 1], pct))
1357
1358     def _copy_results_html_file(self):
1359         base_dir = self._port.path_from_webkit_base('LayoutTests', 'fast', 'harness')
1360         results_file = self._fs.join(base_dir, 'results.html')
1361         # FIXME: What should we do if this doesn't exist (e.g., in unit tests)?
1362         if self._fs.exists(results_file):
1363             self._fs.copyfile(results_file, self._fs.join(self._results_directory, "results.html"))
1364
1365     def _show_results_html_file(self, result_summary):
1366         """Shows the results.html page."""
1367         if self._options.full_results_html:
1368             test_files = result_summary.failures.keys()
1369         else:
1370             unexpected_failures = self._get_failures(result_summary, include_crashes=True, include_missing=True)
1371             test_files = unexpected_failures.keys()
1372
1373         if not len(test_files):
1374             return
1375
1376         results_filename = self._fs.join(self._results_directory, "results.html")
1377         self._port.show_results_html_file(results_filename)
1378
1379     def name(self):
1380         return 'Manager'
1381
1382     def is_done(self):
1383         worker_states = self._worker_states.values()
1384         return worker_states and all(self._worker_is_done(worker_state) for worker_state in worker_states)
1385
1386     # FIXME: Inline this function.
1387     def _worker_is_done(self, worker_state):
1388         return worker_state.done
1389
1390     def cancel_workers(self):
1391         for worker_state in self._worker_states.values():
1392             worker_state.worker_connection.cancel()
1393
1394     def handle_started_test(self, source, test_info, hang_timeout):
1395         worker_state = self._worker_states[source]
1396         worker_state.current_test_name = test_info.test_name
1397         worker_state.next_timeout = time.time() + hang_timeout
1398
1399     def handle_done(self, source):
1400         worker_state = self._worker_states[source]
1401         worker_state.done = True
1402
1403     def handle_exception(self, source, exception_type, exception_value, stack):
1404         if exception_type in (KeyboardInterrupt, TestRunInterruptedException):
1405             raise exception_type(exception_value)
1406         _log.error("%s raised %s('%s'):" % (
1407                    source,
1408                    exception_value.__class__.__name__,
1409                    str(exception_value)))
1410         self._log_worker_stack(stack)
1411         raise WorkerException(str(exception_value))
1412
1413     def handle_finished_list(self, source, list_name, num_tests, elapsed_time):
1414         self._group_stats[list_name] = (num_tests, elapsed_time)
1415
1416         def find(name, test_lists):
1417             for i in range(len(test_lists)):
1418                 if test_lists[i].name == name:
1419                     return i
1420             return -1
1421
1422         index = find(list_name, self._remaining_locked_shards)
1423         if index >= 0:
1424             self._remaining_locked_shards.pop(index)
1425             if not self._remaining_locked_shards:
1426                 self.stop_servers_with_lock()
1427
1428     def handle_finished_test(self, source, result, elapsed_time):
1429         worker_state = self._worker_states[source]
1430         worker_state.next_timeout = None
1431         worker_state.current_test_name = None
1432         worker_state.stats['total_time'] += elapsed_time
1433         worker_state.stats['num_tests'] += 1
1434
1435         self._all_results.append(result)
1436         self._update_summary_with_result(self._current_result_summary, result)
1437
1438     def _log_worker_stack(self, stack):
1439         webkitpydir = self._port.path_from_webkit_base('Tools', 'Scripts', 'webkitpy') + self._port.filesystem.sep
1440         for filename, line_number, function_name, text in stack:
1441             if filename.startswith(webkitpydir):
1442                 filename = filename.replace(webkitpydir, '')
1443             _log.error('  %s:%u (in %s)' % (filename, line_number, function_name))
1444             _log.error('    %s' % text)
1445
1446
1447 def read_test_files(fs, filenames, test_path_separator):
1448     tests = []
1449     for filename in filenames:
1450         try:
1451             if test_path_separator != fs.sep:
1452                 filename = filename.replace(test_path_separator, fs.sep)
1453             file_contents = fs.read_text_file(filename).split('\n')
1454             for line in file_contents:
1455                 line = test_expectations.strip_comments(line)
1456                 if line:
1457                     tests.append(line)
1458         except IOError, e:
1459             if e.errno == errno.ENOENT:
1460                 _log.critical('')
1461                 _log.critical('--test-list file "%s" not found' % file)
1462             raise
1463     return tests
1464
1465
1466 # FIXME: These two free functions belong either on manager (since it's the only one
1467 # which uses them) or in a different file (if they need to be re-used).
1468 def test_key(port, test_name):
1469     """Turns a test name into a list with two sublists, the natural key of the
1470     dirname, and the natural key of the basename.
1471
1472     This can be used when sorting paths so that files in a directory.
1473     directory are kept together rather than being mixed in with files in
1474     subdirectories."""
1475     dirname, basename = port.split_test(test_name)
1476     return (natural_sort_key(dirname + port.TEST_PATH_SEPARATOR), natural_sort_key(basename))
1477
1478
1479 def natural_sort_key(string_to_split):
1480     """ Turn a string into a list of string and number chunks.
1481         "z23a" -> ["z", 23, "a"]
1482
1483         Can be used to implement "natural sort" order. See:
1484             http://www.codinghorror.com/blog/2007/12/sorting-for-humans-natural-sort-order.html
1485             http://nedbatchelder.com/blog/200712.html#e20071211T054956
1486     """
1487     def tryint(val):
1488         try:
1489             return int(val)
1490         except ValueError:
1491             return val
1492
1493     return [tryint(chunk) for chunk in re.split('(\d+)', string_to_split)]
1494
1495
1496 class _WorkerState(object):
1497     """A class for the manager to use to track the current state of the workers."""
1498     def __init__(self, number, worker_connection):
1499         self.worker_connection = worker_connection
1500         self.number = number
1501         self.done = False
1502         self.current_test_name = None
1503         self.next_timeout = None
1504         self.stats = {}
1505         self.stats['name'] = worker_connection.name
1506         self.stats['num_tests'] = 0
1507         self.stats['total_time'] = 0
1508
1509     def __repr__(self):
1510         return "_WorkerState(" + str(self.__dict__) + ")"