nrwt: clean up handling of 'expected' stats
authordpranke@chromium.org <dpranke@chromium.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 31 Jul 2012 00:01:52 +0000 (00:01 +0000)
committerdpranke@chromium.org <dpranke@chromium.org@268f45cc-cd09-0410-ab3c-d52691b4dbfc>
Tue, 31 Jul 2012 00:01:52 +0000 (00:01 +0000)
https://bugs.webkit.org/show_bug.cgi?id=92527

Reviewed by Tony Chang.

This patch alters the way we compute and log the "expected"
results and how we treat skipped tests; we will now log the
number of skipped tests separately from the categories, e.g.:

Found 31607 tests; running 24464.
Expect: 23496 passes   (23496 now,    0 wontfix)
Expect:   548 failures (  543 now,    5 wontfix)
Expect:   420 flaky    (  245 now,  175 wontfix)

(so that the "expect" totals add up to the "running" totals);
in addition, the totals in the one-line-progress reflect the
number of tests we will actually run. If --iterations or
--repeat-each are specified, the number of tests we run are
multiplied as appropriate, but the "expect" numbers are
unchanged, since we don't count multiple invocations of the same
test multiple times. In addition, if we are using --run-part or
--run-chunk, the tests we don't run are treated as skipped
for consistency. We will also log the values for --iterations
and --repeat each as part of the found/running line.

Previously the code had parsed and re-parsed the
TestExpectations files several times in an attempt to come up
with some sane statistics, but this was expensive and lead to
confusing layer; treating files as skipped in the way described
above is more consistent and cleaner.

* Scripts/webkitpy/layout_tests/controllers/manager.py:
(Manager._split_into_chunks_if_necessary):
(Manager.prepare_lists_and_print_output):
(Manager.run):
* Scripts/webkitpy/layout_tests/controllers/manager_unittest.py:
(ManagerTest.test_interrupt_if_at_failure_limits):
(ManagerTest.test_update_summary_with_result):
(ManagerTest.test_look_for_new_crash_logs):
(ResultSummaryTest.get_result_summary):
* Scripts/webkitpy/layout_tests/models/result_summary.py:
(ResultSummary.__init__):
* Scripts/webkitpy/layout_tests/models/test_expectations.py:
(TestExpectationParser.expectation_for_skipped_test):
(TestExpectations.__init__):
(TestExpectations.add_skipped_tests):
  Here we make add_skipped_tests() public, so that we can update
  the expectations for tests that we are skipping due to
  --run-part or --run-chunk; we use the wontfix flag so that
  the tests that are intentionally skipped aren't considered
  "fixable".
* Scripts/webkitpy/layout_tests/models/test_expectations_unittest.py:
(SkippedTests.check):
* Scripts/webkitpy/layout_tests/run_webkit_tests.py:
(parse_args):
* Scripts/webkitpy/layout_tests/views/printing.py:
(Printer.print_found):
(Printer):
(Printer.print_expected):
(Printer._print_result_summary):
(Printer._print_result_summary_entry):
  Here we split out printing the number of tests found and run
  from the expected results, to be clearer and so that we don't
  have to reparse the expectations to update the stats.
* Scripts/webkitpy/layout_tests/views/printing_unittest.py:
(Testprinter.get_result_summary):

git-svn-id: https://svn.webkit.org/repository/webkit/trunk@124116 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Tools/ChangeLog
Tools/Scripts/webkitpy/layout_tests/controllers/manager.py
Tools/Scripts/webkitpy/layout_tests/controllers/manager_unittest.py
Tools/Scripts/webkitpy/layout_tests/models/result_summary.py
Tools/Scripts/webkitpy/layout_tests/models/test_expectations.py
Tools/Scripts/webkitpy/layout_tests/models/test_expectations_unittest.py
Tools/Scripts/webkitpy/layout_tests/run_webkit_tests.py
Tools/Scripts/webkitpy/layout_tests/views/printing.py
Tools/Scripts/webkitpy/layout_tests/views/printing_unittest.py

index 9f919b0..25edd2d 100644 (file)
@@ -1,3 +1,72 @@
+2012-07-30  Dirk Pranke  <dpranke@chromium.org>
+
+        nrwt: clean up handling of 'expected' stats
+        https://bugs.webkit.org/show_bug.cgi?id=92527
+
+        Reviewed by Tony Chang.
+
+        This patch alters the way we compute and log the "expected"
+        results and how we treat skipped tests; we will now log the
+        number of skipped tests separately from the categories, e.g.:
+
+        Found 31607 tests; running 24464.
+        Expect: 23496 passes   (23496 now,    0 wontfix)
+        Expect:   548 failures (  543 now,    5 wontfix)
+        Expect:   420 flaky    (  245 now,  175 wontfix)
+
+        (so that the "expect" totals add up to the "running" totals);
+        in addition, the totals in the one-line-progress reflect the
+        number of tests we will actually run. If --iterations or
+        --repeat-each are specified, the number of tests we run are
+        multiplied as appropriate, but the "expect" numbers are
+        unchanged, since we don't count multiple invocations of the same
+        test multiple times. In addition, if we are using --run-part or
+        --run-chunk, the tests we don't run are treated as skipped
+        for consistency. We will also log the values for --iterations
+        and --repeat each as part of the found/running line.
+
+        Previously the code had parsed and re-parsed the
+        TestExpectations files several times in an attempt to come up
+        with some sane statistics, but this was expensive and lead to
+        confusing layer; treating files as skipped in the way described
+        above is more consistent and cleaner.
+
+        * Scripts/webkitpy/layout_tests/controllers/manager.py:
+        (Manager._split_into_chunks_if_necessary):
+        (Manager.prepare_lists_and_print_output):
+        (Manager.run):
+        * Scripts/webkitpy/layout_tests/controllers/manager_unittest.py:
+        (ManagerTest.test_interrupt_if_at_failure_limits):
+        (ManagerTest.test_update_summary_with_result):
+        (ManagerTest.test_look_for_new_crash_logs):
+        (ResultSummaryTest.get_result_summary):
+        * Scripts/webkitpy/layout_tests/models/result_summary.py:
+        (ResultSummary.__init__):
+        * Scripts/webkitpy/layout_tests/models/test_expectations.py:
+        (TestExpectationParser.expectation_for_skipped_test):
+        (TestExpectations.__init__):
+        (TestExpectations.add_skipped_tests):
+          Here we make add_skipped_tests() public, so that we can update
+          the expectations for tests that we are skipping due to
+          --run-part or --run-chunk; we use the wontfix flag so that
+          the tests that are intentionally skipped aren't considered
+          "fixable".
+        * Scripts/webkitpy/layout_tests/models/test_expectations_unittest.py:
+        (SkippedTests.check):
+        * Scripts/webkitpy/layout_tests/run_webkit_tests.py:
+        (parse_args):
+        * Scripts/webkitpy/layout_tests/views/printing.py:
+        (Printer.print_found):
+        (Printer):
+        (Printer.print_expected):
+        (Printer._print_result_summary):
+        (Printer._print_result_summary_entry):
+          Here we split out printing the number of tests found and run
+          from the expected results, to be clearer and so that we don't
+          have to reparse the expectations to update the stats.
+        * Scripts/webkitpy/layout_tests/views/printing_unittest.py:
+        (Testprinter.get_result_summary):
+
 2012-07-30  Sadrul Habib Chowdhury  <sadrul@chromium.org>
 
         Propagate gesture events to plugins.
index 0544918..2cc048a 100644 (file)
@@ -426,25 +426,12 @@ class Manager(object):
             _log.debug('   last chunk is partial, appending [0:%d]' % extra)
             files.extend(test_files[0:extra])
 
-        len_skip_chunk = int(len(files) * len(skipped) / float(len(self._test_files)))
-        skip_chunk_list = list(skipped)[0:len_skip_chunk]
-        skip_chunk = set(skip_chunk_list)
-
-        # FIXME: This is a total hack.
-        # Update expectations so that the stats are calculated correctly.
-        # We need to pass a list that includes the right # of skipped files
-        # to ParseExpectations so that ResultSummary() will get the correct
-        # stats. So, we add in the subset of skipped files, and then
-        # subtract them back out.
-        self._test_files_list = files + skip_chunk_list
-        self._test_files = set(self._test_files_list)
-
-        self._parse_expectations()
-
-        self._test_files = set(files)
+        tests_to_run = set(files)
+        more_tests_to_skip = self._test_files - tests_to_run
+        self._expectations.add_skipped_tests(more_tests_to_skip)
+        self._test_files = tests_to_run
         self._test_files_list = files
-
-        return skip_chunk
+        return skipped.union(more_tests_to_skip)
 
     # FIXME: This method is way too long and needs to be broken into pieces.
     def prepare_lists_and_print_output(self):
@@ -490,9 +477,8 @@ class Manager(object):
         else:
             self._test_files_list.sort(key=lambda test: test_key(self._port, test))
 
-        skipped = self._split_into_chunks_if_necessary(skipped)
+        all_tests_to_skip = self._split_into_chunks_if_necessary(skipped)
 
-        # FIXME: It's unclear how --repeat-each and --iterations should interact with chunks?
         if self._options.repeat_each:
             list_with_repetitions = []
             for test in self._test_files_list:
@@ -502,22 +488,11 @@ class Manager(object):
         if self._options.iterations:
             self._test_files_list = self._test_files_list * self._options.iterations
 
-        iterations =  \
-            (self._options.repeat_each if self._options.repeat_each else 1) * \
-            (self._options.iterations if self._options.iterations else 1)
-        result_summary = ResultSummary(self._expectations, self._test_files | skipped, iterations)
-
-        self._printer.print_expected(num_all_test_files, result_summary, self._expectations.get_tests_with_result_type)
+        iterations = self._options.repeat_each * self._options.iterations
+        result_summary = ResultSummary(self._expectations, self._test_files, iterations, all_tests_to_skip)
 
-        if self._options.skipped != 'ignore':
-            # Note that we don't actually run the skipped tests (they were
-            # subtracted out of self._test_files, above), but we stub out the
-            # results here so the statistics can remain accurate.
-            for test in skipped:
-                result = test_results.TestResult(test)
-                result.type = test_expectations.SKIP
-                for iteration in range(iterations):
-                    result_summary.add(result, expected=True, test_is_slow=self._test_is_slow(test))
+        self._printer.print_found(num_all_test_files, len(self._test_files), self._options.repeat_each, self._options.iterations)
+        self._printer.print_expected(result_summary, self._expectations.get_tests_with_result_type)
 
         return result_summary
 
@@ -869,7 +844,7 @@ class Manager(object):
             _log.info("Retrying %d unexpected failure(s) ..." % len(failures))
             _log.info('')
             self._retrying = True
-            retry_summary = ResultSummary(self._expectations, failures.keys())
+            retry_summary = ResultSummary(self._expectations, failures.keys(), 1, set())
             # Note that we intentionally ignore the return value here.
             self._run_tests(failures.keys(), retry_summary, num_workers=1)
             failures = self._get_failures(retry_summary, include_crashes=True, include_missing=True)
index 576d423..d13b1b5 100644 (file)
@@ -45,7 +45,6 @@ from webkitpy import layout_tests
 from webkitpy.layout_tests import run_webkit_tests
 from webkitpy.layout_tests.controllers import manager
 from webkitpy.layout_tests.controllers.manager import interpret_test_failures,  Manager, natural_sort_key, test_key, TestRunInterruptedException, TestShard
-from webkitpy.layout_tests.models import result_summary
 from webkitpy.layout_tests.models import test_expectations
 from webkitpy.layout_tests.models import test_failures
 from webkitpy.layout_tests.models import test_results
@@ -288,7 +287,7 @@ class ManagerTest(unittest.TestCase):
         manager._test_files = ['foo/bar.html', 'baz.html']
         manager._test_is_slow = lambda test_name: False
 
-        result_summary = ResultSummary(expectations=Mock(), test_files=manager._test_files)
+        result_summary = ResultSummary(Mock(), manager._test_files, 1, set())
         result_summary.unexpected_failures = 100
         result_summary.unexpected_crashes = 50
         result_summary.unexpected_timeouts = 50
@@ -320,7 +319,7 @@ class ManagerTest(unittest.TestCase):
         # Reftests expected to be image mismatch should be respected when pixel_tests=False.
         manager = Manager(port=port, options=MockOptions(pixel_tests=False, exit_after_n_failures=None, exit_after_n_crashes_or_timeouts=None), printer=Mock())
         manager._expectations = expectations
-        result_summary = ResultSummary(expectations=expectations, test_files=[test])
+        result_summary = ResultSummary(expectations, [test], 1, set())
         result = TestResult(test_name=test, failures=[test_failures.FailureReftestMismatchDidNotOccur()])
         manager._update_summary_with_result(result_summary, result)
         self.assertEquals(1, result_summary.expected)
@@ -373,7 +372,7 @@ class ManagerTest(unittest.TestCase):
         port = host.port_factory.get('test-mac-leopard')
         tests = ['failures/expected/crash.html']
         expectations = test_expectations.TestExpectations(port, tests)
-        rs = result_summary.ResultSummary(expectations, tests)
+        rs = ResultSummary(expectations, tests, 1, set())
         manager = get_manager_with_tests(tests)
         manager._look_for_new_crash_logs(rs, time.time())
 
@@ -509,7 +508,7 @@ class ResultSummaryTest(unittest.TestCase):
     def get_result_summary(self, port, test_names, expectations_str):
         port.expectations_dict = lambda: {'': expectations_str}
         expectations = test_expectations.TestExpectations(port, test_names)
-        return test_names, result_summary.ResultSummary(expectations, test_names), expectations
+        return test_names, ResultSummary(expectations, test_names, 1, set()), expectations
 
     # FIXME: Use this to test more of summarize_results. This was moved from printing_unittest.py.
     def summarized_results(self, port, expected, passing, flaky, extra_tests=[], extra_expectations=None):
index b051212..5bb5010 100644 (file)
@@ -31,7 +31,7 @@ from webkitpy.layout_tests.models.test_expectations import TestExpectations, SKI
 
 
 class ResultSummary(object):
-    def __init__(self, expectations, test_files, iterations=1):
+    def __init__(self, expectations, test_files, iterations, expected_skips):
         self.total = len(test_files) * iterations
         self.remaining = self.total
         self.expectations = expectations
@@ -48,8 +48,8 @@ class ResultSummary(object):
         self.failures = {}
         self.total_failures = 0
         self.expected_skips = 0
-        self.total_tests_by_expectation[SKIP] = 0
-        self.tests_by_expectation[SKIP] = set()
+        self.total_tests_by_expectation[SKIP] = len(expected_skips)
+        self.tests_by_expectation[SKIP] = expected_skips
         for expectation in TestExpectations.EXPECTATIONS.values():
             self.tests_by_expectation[expectation] = set()
             self.total_tests_by_expectation[expectation] = 0
index 9c6d478..b94b832 100644 (file)
@@ -232,6 +232,10 @@ class TestExpectationParser(object):
         expectation_line = TestExpectationLine()
         expectation_line.original_string = test_name
         expectation_line.modifiers = [TestExpectationParser.DUMMY_BUG_MODIFIER, TestExpectationParser.SKIP_MODIFIER]
+        # FIXME: It's not clear what the expectations for a skipped test should be; the expectations
+        # might be different for different entries in a Skipped file, or from the command line, or from
+        # only running parts of the tests. It's also not clear if it matters much.
+        expectation_line.modifiers.append(TestExpectationParser.WONTFIX_MODIFIER)
         expectation_line.name = test_name
         # FIXME: we should pass in a more descriptive string here.
         expectation_line.filename = '<Skipped file>'
@@ -759,7 +763,7 @@ class TestExpectations(object):
                 self._expectations += expectations
 
         # FIXME: move ignore_tests into port.skipped_layout_tests()
-        self._add_skipped_tests(port.skipped_layout_tests(tests).union(set(port.get_option('ignore_tests', []))))
+        self.add_skipped_tests(port.skipped_layout_tests(tests).union(set(port.get_option('ignore_tests', []))))
 
         self._has_warnings = False
         self._report_warnings()
@@ -882,7 +886,7 @@ class TestExpectations(object):
             if self._is_lint_mode or self._test_config in expectation_line.matching_configurations:
                 self._model.add_expectation_line(expectation_line)
 
-    def _add_skipped_tests(self, tests_to_skip):
+    def add_skipped_tests(self, tests_to_skip):
         if not tests_to_skip:
             return
         for test in self._expectations:
index c780dac..2b4853f 100644 (file)
@@ -284,7 +284,7 @@ class SkippedTests(Base):
 
         # Check that the expectation is for BUG_DUMMY SKIP : ... = PASS
         self.assertEquals(exp.get_modifiers('failures/expected/text.html'),
-                          [TestExpectationParser.DUMMY_BUG_MODIFIER, TestExpectationParser.SKIP_MODIFIER])
+                          [TestExpectationParser.DUMMY_BUG_MODIFIER, TestExpectationParser.SKIP_MODIFIER, TestExpectationParser.WONTFIX_MODIFIER])
         self.assertEquals(exp.get_expectations('failures/expected/text.html'), set([PASS]))
 
     def test_skipped_tests_work(self):
index 95a07f5..c4f4f24 100755 (executable)
@@ -412,8 +412,8 @@ def parse_args(args=None):
         optparse.make_option("--exit-after-n-crashes-or-timeouts", type="int",
             default=None, help="Exit after the first N crashes instead of "
             "running all tests"),
-        optparse.make_option("--iterations", type="int", help="Number of times to run the set of tests (e.g. ABCABCABC)"),
-        optparse.make_option("--repeat-each", type="int", help="Number of times to run each test (e.g. AAABBBCCC)"),
+        optparse.make_option("--iterations", type="int", default=1, help="Number of times to run the set of tests (e.g. ABCABCABC)"),
+        optparse.make_option("--repeat-each", type="int", default=1, help="Number of times to run each test (e.g. AAABBBCCC)"),
         optparse.make_option("--retry-failures", action="store_true",
             default=True,
             help="Re-try any tests that produce unexpected results (default)"),
index 1c2fecd..637737e 100644 (file)
@@ -220,21 +220,18 @@ class Printer(object):
         self._print_config('Command line: ' + ' '.join(self._port.driver_cmd_line()))
         self._print_config('')
 
-    def print_expected(self, num_all_test_files, result_summary, tests_with_result_type_callback):
-        self._print_expected('Found %s.' % grammar.pluralize('test', num_all_test_files))
+    def print_found(self, num_all_test_files, num_to_run, repeat_each, iterations):
+        found_str = 'Found %s; running %d' % (grammar.pluralize('test', num_all_test_files), num_to_run)
+        if repeat_each * iterations > 1:
+            found_str += ', %d times each (--repeat-each=%d, --iterations=%d)' % (repeat_each * iterations, repeat_each, iterations)
+        self._print_expected(found_str + '.')
+
+    def print_expected(self, result_summary, tests_with_result_type_callback):
         self._print_expected_results_of_type(result_summary, test_expectations.PASS, "passes", tests_with_result_type_callback)
         self._print_expected_results_of_type(result_summary, test_expectations.FAIL, "failures", tests_with_result_type_callback)
         self._print_expected_results_of_type(result_summary, test_expectations.FLAKY, "flaky", tests_with_result_type_callback)
-        self._print_expected_results_of_type(result_summary, test_expectations.SKIP, "skipped", tests_with_result_type_callback)
         self._print_expected('')
 
-        if self._options.repeat_each > 1:
-            self._print_expected('Running each test %d times.' % self._options.repeat_each)
-        if self._options.iterations > 1:
-            self._print_expected('Running %d iterations of the tests.' % self._options.iterations)
-        if self._options.iterations > 1 or self._options.repeat_each > 1:
-            self._print_expected('')
-
     def print_workers_and_shards(self, num_workers, num_shards, num_locked_shards):
         driver_name = self._port.driver_name()
         if num_workers == 1:
@@ -447,7 +444,7 @@ class Printer(object):
         """
         failed = result_summary.total_failures
         total = result_summary.total - result_summary.expected_skips
-        passed = total - failed
+        passed = total - failed - result_summary.remaining
         pct_passed = 0.0
         if total > 0:
             pct_passed = float(passed) * 100 / total
@@ -460,6 +457,7 @@ class Printer(object):
             test_expectations.NOW, "Tests to be fixed")
 
         self.print_actual("")
+        # FIXME: We should be skipping anything marked WONTFIX, so we shouldn't bother logging these stats.
         self._print_result_summary_entry(result_summary,
             test_expectations.WONTFIX,
             "Tests that will only be fixed if they crash (WONTFIX)")
@@ -471,7 +469,7 @@ class Printer(object):
 
         Args:
           result_summary: summary to print results for
-          timeline: the timeline to print results for (NOT, WONTFIX, etc.)
+          timeline: the timeline to print results for (NOW, WONTFIX, etc.)
           heading: a textual description of the timeline
         """
         total = len(result_summary.tests_by_timeline[timeline])
@@ -481,7 +479,7 @@ class Printer(object):
         self.print_actual("=> %s (%d):" % (heading, not_passing))
 
         for result in TestExpectations.EXPECTATION_ORDER:
-            if result == test_expectations.PASS:
+            if result in (test_expectations.PASS, test_expectations.SKIP):
                 continue
             results = (result_summary.tests_by_expectation[result] &
                        result_summary.tests_by_timeline[timeline])
index f8dd61d..7505d5c 100644 (file)
@@ -130,7 +130,7 @@ class  Testprinter(unittest.TestCase):
         port.test_expectations_overrides = lambda: None
         expectations = test_expectations.TestExpectations(self._port, test_names)
 
-        rs = result_summary.ResultSummary(expectations, test_names)
+        rs = result_summary.ResultSummary(expectations, test_names, 1, set())
         return test_names, rs, expectations
 
     def test_help_printer(self):