[GTK] Clear application cache between tests in DumpRenderTree
[WebKit-https.git] / Tools / Scripts / webkitpy / layout_tests / views / printing.py
1 #!/usr/bin/env python
2 # Copyright (C) 2010, 2012 Google Inc. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 """Package that handles non-debug, non-file output for run-webkit-tests."""
31
32 import math
33 import optparse
34
35 from webkitpy.tool import grammar
36 from webkitpy.common.net import resultsjsonparser
37 from webkitpy.layout_tests.models import test_expectations
38 from webkitpy.layout_tests.models.test_expectations import TestExpectations
39 from webkitpy.layout_tests.views.metered_stream import MeteredStream
40
41
42 NUM_SLOW_TESTS_TO_LOG = 10
43
44
45 def print_options():
46     return [
47         optparse.make_option('-q', '--quiet', action='store_true', default=False,
48                              help='run quietly (errors, warnings, and progress only)'),
49         optparse.make_option('-v', '--verbose', action='store_true', default=False,
50                              help='print a summarized result for every test (one line per test)'),
51         optparse.make_option('--details', action='store_true', default=False,
52                              help='print detailed results for every test'),
53         optparse.make_option('--debug-rwt-logging', action='store_true', default=False,
54                              help='print timestamps and debug information for run-webkit-tests itself'),
55     ]
56
57
58 class Printer(object):
59     """Class handling all non-debug-logging printing done by run-webkit-tests.
60
61     Printing from run-webkit-tests falls into two buckets: general or
62     regular output that is read only by humans and can be changed at any
63     time, and output that is parsed by buildbots (and humans) and hence
64     must be changed more carefully and in coordination with the buildbot
65     parsing code (in chromium.org's buildbot/master.chromium/scripts/master/
66     log_parser/webkit_test_command.py script).
67
68     By default the buildbot-parsed code gets logged to stdout, and regular
69     output gets logged to stderr."""
70     def __init__(self, port, options, regular_output, buildbot_output, logger=None):
71         self.num_completed = 0
72         self.num_tests = 0
73         self._port = port
74         self._options = options
75         self._buildbot_stream = buildbot_output
76         self._meter = MeteredStream(regular_output, options.debug_rwt_logging, logger=logger)
77         self._running_tests = []
78         self._completed_tests = []
79
80     def cleanup(self):
81         self._meter.cleanup()
82
83     def __del__(self):
84         self.cleanup()
85
86     def print_config(self):
87         self._print_default("Using port '%s'" % self._port.name())
88         self._print_default("Test configuration: %s" % self._port.test_configuration())
89         self._print_default("Placing test results in %s" % self._options.results_directory)
90
91         # FIXME: should these options be in printing_options?
92         if self._options.new_baseline:
93             self._print_default("Placing new baselines in %s" % self._port.baseline_path())
94
95         fs = self._port.host.filesystem
96         fallback_path = [fs.split(x)[1] for x in self._port.baseline_search_path()]
97         self._print_default("Baseline search path: %s -> generic" % " -> ".join(fallback_path))
98
99         self._print_default("Using %s build" % self._options.configuration)
100         if self._options.pixel_tests:
101             self._print_default("Pixel tests enabled")
102         else:
103             self._print_default("Pixel tests disabled")
104
105         self._print_default("Regular timeout: %s, slow test timeout: %s" %
106                   (self._options.time_out_ms, self._options.slow_time_out_ms))
107
108         self._print_default('Command line: ' + ' '.join(self._port.driver_cmd_line()))
109         self._print_default('')
110
111     def print_found(self, num_all_test_files, num_to_run, repeat_each, iterations):
112         num_unique_tests = num_to_run / (repeat_each * iterations)
113         found_str = 'Found %s; running %d' % (grammar.pluralize('test', num_all_test_files), num_unique_tests)
114         if repeat_each * iterations > 1:
115             found_str += ' (%d times each: --repeat-each=%d --iterations=%d)' % (repeat_each * iterations, repeat_each, iterations)
116         found_str += ', skipping %d' % (num_all_test_files - num_unique_tests)
117         self._print_default(found_str + '.')
118
119     def print_expected(self, result_summary, tests_with_result_type_callback):
120         self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes", tests_with_result_type_callback)
121         self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures", tests_with_result_type_callback)
122         self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky", tests_with_result_type_callback)
123         self._print_debug('')
124
125     def print_workers_and_shards(self, num_workers, num_shards, num_locked_shards):
126         driver_name = self._port.driver_name()
127         if num_workers == 1:
128             self._print_default("Running 1 %s over %s." % (driver_name, grammar.pluralize('shard', num_shards)))
129         else:
130             self._print_default("Running %d %ss in parallel over %d shards (%d locked)." %
131                 (num_workers, driver_name, num_shards, num_locked_shards))
132         self._print_default('')
133
134     def _print_expected_results_of_type(self, result_summary, result_type, result_type_str, tests_with_result_type_callback):
135         tests = tests_with_result_type_callback(result_type)
136         now = result_summary.tests_by_timeline[test_expectations.NOW]
137         wontfix = result_summary.tests_by_timeline[test_expectations.WONTFIX]
138
139         # We use a fancy format string in order to print the data out in a
140         # nicely-aligned table.
141         fmtstr = ("Expect: %%5d %%-8s (%%%dd now, %%%dd wontfix)"
142                   % (self._num_digits(now), self._num_digits(wontfix)))
143         self._print_debug(fmtstr % (len(tests), result_type_str, len(tests & now), len(tests & wontfix)))
144
145     def _num_digits(self, num):
146         ndigits = 1
147         if len(num):
148             ndigits = int(math.log10(len(num))) + 1
149         return ndigits
150
151     def print_results(self, run_time, thread_timings, test_timings, individual_test_timings, result_summary, unexpected_results):
152         self._print_timing_statistics(run_time, thread_timings, test_timings, individual_test_timings, result_summary)
153         self._print_result_summary(result_summary)
154         self._print_one_line_summary(result_summary.total - result_summary.expected_skips,
155                                      result_summary.expected - result_summary.expected_skips,
156                                      result_summary.unexpected)
157         self._print_unexpected_results(unexpected_results)
158
159     def _print_timing_statistics(self, total_time, thread_timings,
160                                  directory_test_timings, individual_test_timings,
161                                  result_summary):
162         self._print_debug("Test timing:")
163         self._print_debug("  %6.2f total testing time" % total_time)
164         self._print_debug("")
165         self._print_debug("Thread timing:")
166         cuml_time = 0
167         for t in thread_timings:
168             self._print_debug("    %10s: %5d tests, %6.2f secs" % (t['name'], t['num_tests'], t['total_time']))
169             cuml_time += t['total_time']
170         self._print_debug("   %6.2f cumulative, %6.2f optimal" % (cuml_time, cuml_time / int(self._options.child_processes)))
171         self._print_debug("")
172
173         self._print_aggregate_test_statistics(individual_test_timings)
174         self._print_individual_test_times(individual_test_timings, result_summary)
175         self._print_directory_timings(directory_test_timings)
176
177     def _print_aggregate_test_statistics(self, individual_test_timings):
178         times_for_dump_render_tree = [test_stats.test_run_time for test_stats in individual_test_timings]
179         self._print_statistics_for_test_timings("PER TEST TIME IN TESTSHELL (seconds):", times_for_dump_render_tree)
180
181     def _print_individual_test_times(self, individual_test_timings, result_summary):
182         # Reverse-sort by the time spent in DumpRenderTree.
183         individual_test_timings.sort(lambda a, b: cmp(b.test_run_time, a.test_run_time))
184         num_printed = 0
185         slow_tests = []
186         timeout_or_crash_tests = []
187         unexpected_slow_tests = []
188         for test_tuple in individual_test_timings:
189             test_name = test_tuple.test_name
190             is_timeout_crash_or_slow = False
191             if test_name in result_summary.slow_tests:
192                 is_timeout_crash_or_slow = True
193                 slow_tests.append(test_tuple)
194
195             if test_name in result_summary.failures:
196                 result = result_summary.results[test_name].type
197                 if (result == test_expectations.TIMEOUT or
198                     result == test_expectations.CRASH):
199                     is_timeout_crash_or_slow = True
200                     timeout_or_crash_tests.append(test_tuple)
201
202             if (not is_timeout_crash_or_slow and num_printed < NUM_SLOW_TESTS_TO_LOG):
203                 num_printed = num_printed + 1
204                 unexpected_slow_tests.append(test_tuple)
205
206         self._print_debug("")
207         self._print_test_list_timing("%s slowest tests that are not marked as SLOW and did not timeout/crash:" %
208             NUM_SLOW_TESTS_TO_LOG, unexpected_slow_tests)
209         self._print_debug("")
210         self._print_test_list_timing("Tests marked as SLOW:", slow_tests)
211         self._print_debug("")
212         self._print_test_list_timing("Tests that timed out or crashed:", timeout_or_crash_tests)
213         self._print_debug("")
214
215     def _print_test_list_timing(self, title, test_list):
216         self._print_debug(title)
217         for test_tuple in test_list:
218             test_run_time = round(test_tuple.test_run_time, 1)
219             self._print_debug("  %s took %s seconds" % (test_tuple.test_name, test_run_time))
220
221     def _print_directory_timings(self, directory_test_timings):
222         timings = []
223         for directory in directory_test_timings:
224             num_tests, time_for_directory = directory_test_timings[directory]
225             timings.append((round(time_for_directory, 1), directory, num_tests))
226         timings.sort()
227
228         self._print_debug("Time to process slowest subdirectories:")
229         min_seconds_to_print = 10
230         for timing in timings:
231             if timing[0] > min_seconds_to_print:
232                 self._print_debug("  %s took %s seconds to run %s tests." % (timing[1], timing[0], timing[2]))
233         self._print_debug("")
234
235     def _print_statistics_for_test_timings(self, title, timings):
236         self._print_debug(title)
237         timings.sort()
238
239         num_tests = len(timings)
240         if not num_tests:
241             return
242         percentile90 = timings[int(.9 * num_tests)]
243         percentile99 = timings[int(.99 * num_tests)]
244
245         if num_tests % 2 == 1:
246             median = timings[((num_tests - 1) / 2) - 1]
247         else:
248             lower = timings[num_tests / 2 - 1]
249             upper = timings[num_tests / 2]
250             median = (float(lower + upper)) / 2
251
252         mean = sum(timings) / num_tests
253
254         for timing in timings:
255             sum_of_deviations = math.pow(timing - mean, 2)
256
257         std_deviation = math.sqrt(sum_of_deviations / num_tests)
258         self._print_debug("  Median:          %6.3f" % median)
259         self._print_debug("  Mean:            %6.3f" % mean)
260         self._print_debug("  90th percentile: %6.3f" % percentile90)
261         self._print_debug("  99th percentile: %6.3f" % percentile99)
262         self._print_debug("  Standard dev:    %6.3f" % std_deviation)
263         self._print_debug("")
264
265     def _print_result_summary(self, result_summary):
266         if not self._options.debug_rwt_logging:
267             return
268
269         failed = result_summary.total_failures
270         total = result_summary.total - result_summary.expected_skips
271         passed = total - failed - result_summary.remaining
272         pct_passed = 0.0
273         if total > 0:
274             pct_passed = float(passed) * 100 / total
275
276         self._print_for_bot("=> Results: %d/%d tests passed (%.1f%%)" % (passed, total, pct_passed))
277         self._print_for_bot("")
278         self._print_result_summary_entry(result_summary, test_expectations.NOW, "Tests to be fixed")
279
280         self._print_for_bot("")
281         # FIXME: We should be skipping anything marked WONTFIX, so we shouldn't bother logging these stats.
282         self._print_result_summary_entry(result_summary, test_expectations.WONTFIX,
283             "Tests that will only be fixed if they crash (WONTFIX)")
284         self._print_for_bot("")
285
286     def _print_result_summary_entry(self, result_summary, timeline, heading):
287         total = len(result_summary.tests_by_timeline[timeline])
288         not_passing = (total -
289            len(result_summary.tests_by_expectation[test_expectations.PASS] &
290                result_summary.tests_by_timeline[timeline]))
291         self._print_for_bot("=> %s (%d):" % (heading, not_passing))
292
293         for result in TestExpectations.EXPECTATION_ORDER:
294             if result in (test_expectations.PASS, test_expectations.SKIP):
295                 continue
296             results = (result_summary.tests_by_expectation[result] &
297                        result_summary.tests_by_timeline[timeline])
298             desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result]
299             if not_passing and len(results):
300                 pct = len(results) * 100.0 / not_passing
301                 self._print_for_bot("  %5d %-24s (%4.1f%%)" % (len(results), desc[0], pct))
302
303     def _print_one_line_summary(self, total, expected, unexpected):
304         incomplete = total - expected - unexpected
305         incomplete_str = ''
306         if incomplete:
307             self._print_default("")
308             incomplete_str = " (%d didn't run)" % incomplete
309
310         if self._options.verbose or self._options.debug_rwt_logging or unexpected:
311             self.writeln("")
312
313         summary = ''
314         if unexpected == 0:
315             if expected == total:
316                 if expected > 1:
317                     summary = "All %d tests ran as expected." % expected
318                 else:
319                     summary = "The test ran as expected."
320             else:
321                 summary = "%s ran as expected%s." % (grammar.pluralize('test', expected), incomplete_str)
322         else:
323             summary = "%s ran as expected, %d didn't%s:" % (grammar.pluralize('test', expected), unexpected, incomplete_str)
324
325         self._print_quiet(summary)
326         self._print_quiet("")
327
328     def _test_status_line(self, test_name, suffix):
329         format_string = '[%d/%d] %s%s'
330         status_line = format_string % (self.num_completed, self.num_tests, test_name, suffix)
331         if len(status_line) > self._meter.number_of_columns():
332             overflow_columns = len(status_line) - self._meter.number_of_columns()
333             ellipsis = '...'
334             if len(test_name) < overflow_columns + len(ellipsis) + 2:
335                 # We don't have enough space even if we elide, just show the test filename.
336                 fs = self._port.host.filesystem
337                 test_name = fs.split(test_name)[1]
338             else:
339                 new_length = len(test_name) - overflow_columns - len(ellipsis)
340                 prefix = int(new_length / 2)
341                 test_name = test_name[:prefix] + ellipsis + test_name[-(new_length - prefix):]
342         return format_string % (self.num_completed, self.num_tests, test_name, suffix)
343
344     def print_started_test(self, test_name):
345         self._running_tests.append(test_name)
346         if len(self._running_tests) > 1:
347             suffix = ' (+%d)' % (len(self._running_tests) - 1)
348         else:
349             suffix = ''
350         if self._options.verbose:
351             write = self._meter.write_update
352         else:
353             write = self._meter.write_throttled_update
354         write(self._test_status_line(test_name, suffix))
355
356     def print_finished_test(self, result, expected, exp_str, got_str):
357         self.num_completed += 1
358         test_name = result.test_name
359         if self._options.details:
360             self._print_test_trace(result, exp_str, got_str)
361         elif (self._options.verbose and not self._options.debug_rwt_logging) or not expected:
362             desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result.type]
363             suffix = ' ' + desc[1]
364             if not expected:
365                 suffix += ' unexpectedly' + desc[2]
366             self.writeln(self._test_status_line(test_name, suffix))
367         elif self.num_completed == self.num_tests:
368             self._meter.write_update('')
369         else:
370             desc = TestExpectations.EXPECTATION_DESCRIPTIONS[result.type]
371             suffix = ' ' + desc[1]
372             if test_name == self._running_tests[0]:
373                 self._completed_tests.insert(0, [test_name, suffix])
374             else:
375                 self._completed_tests.append([test_name, suffix])
376
377             for test_name, suffix in self._completed_tests:
378                 self._meter.write_throttled_update(self._test_status_line(test_name, suffix))
379             self._completed_tests = []
380         self._running_tests.remove(test_name)
381
382     def _print_test_trace(self, result, exp_str, got_str):
383         test_name = result.test_name
384         self._print_default(self._test_status_line(test_name, ''))
385
386         base = self._port.lookup_virtual_test_base(test_name)
387         if base:
388             args = ' '.join(self._port.lookup_virtual_test_args(test_name))
389             self._print_default(' base: %s' % base)
390             self._print_default(' args: %s' % args)
391
392         for extension in ('.txt', '.png', '.wav', '.webarchive'):
393             self._print_baseline(test_name, extension)
394
395         self._print_default('  exp: %s' % exp_str)
396         self._print_default('  got: %s' % got_str)
397         self._print_default(' took: %-.3f' % result.test_run_time)
398         self._print_default('')
399
400     def _print_baseline(self, test_name, extension):
401         baseline = self._port.expected_filename(test_name, extension)
402         if self._port._filesystem.exists(baseline):
403             relpath = self._port.relative_test_filename(baseline)
404         else:
405             relpath = '<none>'
406         self._print_default('  %s: %s' % (extension[1:], relpath))
407
408     def _print_unexpected_results(self, unexpected_results):
409         # Prints to the buildbot stream
410         passes = {}
411         flaky = {}
412         regressions = {}
413
414         def add_to_dict_of_lists(dict, key, value):
415             dict.setdefault(key, []).append(value)
416
417         def add_result(test, results, passes=passes, flaky=flaky, regressions=regressions):
418             actual = results['actual'].split(" ")
419             expected = results['expected'].split(" ")
420             if actual == ['PASS']:
421                 if 'CRASH' in expected:
422                     add_to_dict_of_lists(passes, 'Expected to crash, but passed', test)
423                 elif 'TIMEOUT' in expected:
424                     add_to_dict_of_lists(passes, 'Expected to timeout, but passed', test)
425                 else:
426                     add_to_dict_of_lists(passes, 'Expected to fail, but passed', test)
427             elif len(actual) > 1:
428                 # We group flaky tests by the first actual result we got.
429                 add_to_dict_of_lists(flaky, actual[0], test)
430             else:
431                 add_to_dict_of_lists(regressions, results['actual'], test)
432
433         resultsjsonparser.for_each_test(unexpected_results['tests'], add_result)
434
435         if len(passes) or len(flaky) or len(regressions):
436             self._print_for_bot("")
437         if len(passes):
438             for key, tests in passes.iteritems():
439                 self._print_for_bot("%s: (%d)" % (key, len(tests)))
440                 tests.sort()
441                 for test in tests:
442                     self._print_for_bot("  %s" % test)
443                 self._print_for_bot("")
444             self._print_for_bot("")
445
446         if len(flaky):
447             descriptions = TestExpectations.EXPECTATION_DESCRIPTIONS
448             for key, tests in flaky.iteritems():
449                 result = TestExpectations.EXPECTATIONS[key.lower()]
450                 self._print_for_bot("Unexpected flakiness: %s (%d)" % (descriptions[result][0], len(tests)))
451                 tests.sort()
452
453                 for test in tests:
454                     result = resultsjsonparser.result_for_test(unexpected_results['tests'], test)
455                     actual = result['actual'].split(" ")
456                     expected = result['expected'].split(" ")
457                     result = TestExpectations.EXPECTATIONS[key.lower()]
458                     new_expectations_list = list(set(actual) | set(expected))
459                     self._print_for_bot("  %s = %s" % (test, " ".join(new_expectations_list)))
460                 self._print_for_bot("")
461             self._print_for_bot("")
462
463         if len(regressions):
464             descriptions = TestExpectations.EXPECTATION_DESCRIPTIONS
465             for key, tests in regressions.iteritems():
466                 result = TestExpectations.EXPECTATIONS[key.lower()]
467                 self._print_for_bot("Regressions: Unexpected %s : (%d)" % (descriptions[result][0], len(tests)))
468                 tests.sort()
469                 for test in tests:
470                     self._print_for_bot("  %s = %s" % (test, key))
471                 self._print_for_bot("")
472
473         if len(unexpected_results['tests']) and self._options.debug_rwt_logging:
474             self._print_for_bot("%s" % ("-" * 78))
475
476     def _print_quiet(self, msg):
477         self.writeln(msg)
478
479     def _print_default(self, msg):
480         if not self._options.quiet:
481             self.writeln(msg)
482
483     def _print_debug(self, msg):
484         if self._options.debug_rwt_logging:
485             self.writeln(msg)
486
487     def _print_for_bot(self, msg):
488         self._buildbot_stream.write(msg + "\n")
489
490     def write_update(self, msg):
491         self._meter.write_update(msg)
492
493     def writeln(self, msg):
494         self._meter.writeln(msg)
495
496     def flush(self):
497         self._meter.flush()