Remove _lines_to_ignore_in_stderr, which was only used in Chromium port
[WebKit-https.git] / Tools / Scripts / webkitpy / performance_tests / perftest.py
1 # Copyright (C) 2012 Google Inc. All rights reserved.
2 # Copyright (C) 2012 Zoltan Horvath, Adobe Systems Incorporated. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 import errno
32 import logging
33 import math
34 import re
35 import os
36 import signal
37 import socket
38 import subprocess
39 import sys
40 import time
41
42 # Import for auto-install
43 if sys.platform not in ('cygwin', 'win32'):
44     # FIXME: webpagereplay doesn't work on win32. See https://bugs.webkit.org/show_bug.cgi?id=88279.
45     import webkitpy.thirdparty.autoinstalled.webpagereplay.replay
46
47 from webkitpy.layout_tests.controllers.test_result_writer import TestResultWriter
48 from webkitpy.port.driver import DriverInput
49 from webkitpy.port.driver import DriverOutput
50
51 DEFAULT_TEST_RUNNER_COUNT = 4
52
53 _log = logging.getLogger(__name__)
54
55
56 class PerfTestMetric(object):
57     def __init__(self, metric, unit=None, iterations=None):
58         # FIXME: Fix runner.js to report correct metric names
59         self._iterations = iterations or []
60         self._unit = unit or self.metric_to_unit(metric)
61         self._metric = self.time_unit_to_metric(self._unit) if metric == 'Time' else metric
62
63     def name(self):
64         return self._metric
65
66     def has_values(self):
67         return bool(self._iterations)
68
69     def append_group(self, group_values):
70         assert isinstance(group_values, list)
71         self._iterations.append(group_values)
72
73     def grouped_iteration_values(self):
74         return self._iterations
75
76     def flattened_iteration_values(self):
77         return [value for group_values in self._iterations for value in group_values]
78
79     def unit(self):
80         return self._unit
81
82     @staticmethod
83     def metric_to_unit(metric):
84         assert metric in ('Time', 'Malloc', 'JSHeap')
85         return 'ms' if metric == 'Time' else 'bytes'
86
87     @staticmethod
88     def time_unit_to_metric(unit):
89         return {'fps': 'FrameRate', 'runs/s': 'Runs', 'ms': 'Time'}[unit]
90
91
92 class PerfTest(object):
93
94     def __init__(self, port, test_name, test_path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
95         self._port = port
96         self._test_name = test_name
97         self._test_path = test_path
98         self._description = None
99         self._metrics = {}
100         self._ordered_metrics_name = []
101         self._test_runner_count = test_runner_count
102
103     def test_name(self):
104         return self._test_name
105
106     def test_name_without_file_extension(self):
107         return re.sub(r'\.\w+$', '', self.test_name())
108
109     def test_path(self):
110         return self._test_path
111
112     def description(self):
113         return self._description
114
115     def prepare(self, time_out_ms):
116         return True
117
118     def _create_driver(self):
119         return self._port.create_driver(worker_number=0, no_timeout=True)
120
121     def run(self, time_out_ms):
122         for _ in xrange(self._test_runner_count):
123             driver = self._create_driver()
124             try:
125                 if not self._run_with_driver(driver, time_out_ms):
126                     return None
127             finally:
128                 driver.stop()
129
130         should_log = not self._port.get_option('profile')
131         if should_log and self._description:
132             _log.info('DESCRIPTION: %s' % self._description)
133
134         results = {}
135         for metric_name in self._ordered_metrics_name:
136             metric = self._metrics[metric_name]
137             results[metric.name()] = metric.grouped_iteration_values()
138             if should_log:
139                 legacy_chromium_bot_compatible_name = self.test_name_without_file_extension().replace('/', ': ')
140                 self.log_statistics(legacy_chromium_bot_compatible_name + ': ' + metric.name(),
141                     metric.flattened_iteration_values(), metric.unit())
142
143         return results
144
145     @staticmethod
146     def log_statistics(test_name, values, unit):
147         sorted_values = sorted(values)
148
149         # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
150         square_sum = 0
151         mean = 0
152         for i, time in enumerate(sorted_values):
153             delta = time - mean
154             sweep = i + 1.0
155             mean += delta / sweep
156             square_sum += delta * (time - mean)
157
158         middle = int(len(sorted_values) / 2)
159         mean = sum(sorted_values) / len(values)
160         median = sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2
161         stdev = math.sqrt(square_sum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0
162
163         _log.info('RESULT %s= %s %s' % (test_name, mean, unit))
164         _log.info('median= %s %s, stdev= %s %s, min= %s %s, max= %s %s' %
165             (median, unit, stdev, unit, sorted_values[0], unit, sorted_values[-1], unit))
166
167     _description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
168     _metrics_regex = re.compile(r'^(?P<metric>Time|Malloc|JS Heap):')
169     _statistics_keys = ['avg', 'median', 'stdev', 'min', 'max', 'unit', 'values']
170     _score_regex = re.compile(r'^(?P<key>' + r'|'.join(_statistics_keys) + r')\s+(?P<value>([0-9\.]+(,\s+)?)+)\s*(?P<unit>.*)')
171
172     def _run_with_driver(self, driver, time_out_ms):
173         output = self.run_single(driver, self.test_path(), time_out_ms)
174         self._filter_output(output)
175         if self.run_failed(output):
176             return False
177
178         current_metric = None
179         for line in re.split('\n', output.text):
180             description_match = self._description_regex.match(line)
181             metric_match = self._metrics_regex.match(line)
182             score = self._score_regex.match(line)
183
184             if description_match:
185                 self._description = description_match.group('description')
186             elif metric_match:
187                 current_metric = metric_match.group('metric').replace(' ', '')
188             elif score:
189                 if score.group('key') != 'values':
190                     continue
191
192                 metric = self._ensure_metrics(current_metric, score.group('unit'))
193                 metric.append_group(map(lambda value: float(value), score.group('value').split(', ')))
194             else:
195                 _log.error('ERROR: ' + line)
196                 return False
197
198         return True
199
200     def _ensure_metrics(self, metric_name, unit=None):
201         if metric_name not in self._metrics:
202             self._metrics[metric_name] = PerfTestMetric(metric_name, unit)
203             self._ordered_metrics_name.append(metric_name)
204         return self._metrics[metric_name]
205
206     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
207         return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)
208
209     def run_failed(self, output):
210         if output.text == None or output.error:
211             pass
212         elif output.timeout:
213             _log.error('timeout: %s' % self.test_name())
214         elif output.crash:
215             _log.error('crash: %s' % self.test_name())
216         else:
217             return False
218
219         if output.error:
220             _log.error('error: %s\n%s' % (self.test_name(), output.error))
221
222         return True
223
224     @staticmethod
225     def _should_ignore_line(regexps, line):
226         if not line:
227             return True
228         for regexp in regexps:
229             if regexp.search(line):
230                 return True
231         return False
232
233     _lines_to_ignore_in_parser_result = [
234         re.compile(r'^Running \d+ times$'),
235         re.compile(r'^Ignoring warm-up '),
236         re.compile(r'^Info:'),
237         re.compile(r'^\d+(.\d+)?(\s*(runs\/s|ms|fps))?$'),
238         # Following are for handle existing test like Dromaeo
239         re.compile(re.escape("""main frame - has 1 onunload handler(s)""")),
240         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->-->" - has 1 onunload handler(s)""")),
241         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->/<!--frame0-->-->" - has 1 onunload handler(s)""")),
242         # Following is for html5.html
243         re.compile(re.escape("""Blocked access to external URL http://www.whatwg.org/specs/web-apps/current-work/""")),
244         re.compile(r"CONSOLE MESSAGE: (line \d+: )?Blocked script execution in '[A-Za-z0-9\-\.:]+' because the document's frame is sandboxed and the 'allow-scripts' permission is not set."),
245         re.compile(r"CONSOLE MESSAGE: (line \d+: )?Not allowed to load local resource"),
246         # Dromaeo reports values for subtests. Ignore them for now.
247         re.compile(r'(?P<name>.+): \[(?P<values>(\d+(.\d+)?,\s+)*\d+(.\d+)?)\]'),
248     ]
249
250     def _filter_output(self, output):
251         if output.text:
252             output.text = '\n'.join([line for line in re.split('\n', output.text) if not self._should_ignore_line(self._lines_to_ignore_in_parser_result, line)])
253
254
255 class SingleProcessPerfTest(PerfTest):
256     def __init__(self, port, test_name, test_path, test_runner_count=1):
257         super(SingleProcessPerfTest, self).__init__(port, test_name, test_path, test_runner_count)
258
259
260 class ReplayServer(object):
261     def __init__(self, archive, record):
262         self._process = None
263
264         # FIXME: Should error if local proxy isn't set to forward requests to localhost:8080 and localhost:8443
265
266         replay_path = webkitpy.thirdparty.autoinstalled.webpagereplay.replay.__file__
267         args = ['python', replay_path, '--no-dns_forwarding', '--port', '8080', '--ssl_port', '8443', '--use_closest_match', '--log_level', 'warning']
268         if record:
269             args.append('--record')
270         args.append(archive)
271
272         self._process = subprocess.Popen(args)
273
274     def wait_until_ready(self):
275         for i in range(0, 3):
276             try:
277                 connection = socket.create_connection(('localhost', '8080'), timeout=1)
278                 connection.close()
279                 return True
280             except socket.error:
281                 time.sleep(1)
282                 continue
283         return False
284
285     def stop(self):
286         if self._process:
287             self._process.send_signal(signal.SIGINT)
288             self._process.wait()
289         self._process = None
290
291     def __del__(self):
292         self.stop()
293
294
295 class ReplayPerfTest(PerfTest):
296     _FORCE_GC_FILE = 'resources/force-gc.html'
297
298     def __init__(self, port, test_name, test_path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
299         super(ReplayPerfTest, self).__init__(port, test_name, test_path, test_runner_count)
300         self.force_gc_test = self._port.host.filesystem.join(self._port.perf_tests_dir(), self._FORCE_GC_FILE)
301
302     def _start_replay_server(self, archive, record):
303         try:
304             return ReplayServer(archive, record)
305         except OSError as error:
306             if error.errno == errno.ENOENT:
307                 _log.error("Replay tests require web-page-replay.")
308             else:
309                 raise error
310
311     def prepare(self, time_out_ms):
312         filesystem = self._port.host.filesystem
313         path_without_ext = filesystem.splitext(self.test_path())[0]
314
315         self._archive_path = filesystem.join(path_without_ext + '.wpr')
316         self._expected_image_path = filesystem.join(path_without_ext + '-expected.png')
317         self._url = filesystem.read_text_file(self.test_path()).split('\n')[0]
318
319         if filesystem.isfile(self._archive_path) and filesystem.isfile(self._expected_image_path):
320             _log.info("Replay ready for %s" % self._archive_path)
321             return True
322
323         _log.info("Preparing replay for %s" % self.test_name())
324
325         driver = self._port.create_driver(worker_number=0, no_timeout=True)
326         try:
327             output = self.run_single(driver, self._archive_path, time_out_ms, record=True)
328         finally:
329             driver.stop()
330
331         if not output or not filesystem.isfile(self._archive_path):
332             _log.error("Failed to prepare a replay for %s" % self.test_name())
333             return False
334
335         _log.info("Prepared replay for %s" % self.test_name())
336
337         return True
338
339     def _run_with_driver(self, driver, time_out_ms):
340         times = []
341         malloc = []
342         js_heap = []
343
344         for i in range(0, 6):
345             output = self.run_single(driver, self.test_path(), time_out_ms)
346             if not output or self.run_failed(output):
347                 return False
348             if i == 0:
349                 continue
350
351             times.append(output.test_time * 1000)
352
353             if not output.measurements:
354                 continue
355
356             for metric, result in output.measurements.items():
357                 assert metric == 'Malloc' or metric == 'JSHeap'
358                 if metric == 'Malloc':
359                     malloc.append(result)
360                 else:
361                     js_heap.append(result)
362
363         if times:
364             self._ensure_metrics('Time').append_group(times)
365         if malloc:
366             self._ensure_metrics('Malloc').append_group(malloc)
367         if js_heap:
368             self._ensure_metrics('JSHeap').append_group(js_heap)
369
370         return True
371
372     def run_single(self, driver, url, time_out_ms, record=False):
373         server = self._start_replay_server(self._archive_path, record)
374         if not server:
375             _log.error("Web page replay didn't start.")
376             return None
377
378         try:
379             _log.debug("Waiting for Web page replay to start.")
380             if not server.wait_until_ready():
381                 _log.error("Web page replay didn't start.")
382                 return None
383
384             _log.debug("Web page replay started. Loading the page.")
385             # Force GC to prevent pageload noise. See https://bugs.webkit.org/show_bug.cgi?id=98203
386             super(ReplayPerfTest, self).run_single(driver, self.force_gc_test, time_out_ms, False)
387             output = super(ReplayPerfTest, self).run_single(driver, self._url, time_out_ms, should_run_pixel_test=True)
388             if self.run_failed(output):
389                 return None
390
391             if not output.image:
392                 _log.error("Loading the page did not generate image results")
393                 _log.error(output.text)
394                 return None
395
396             filesystem = self._port.host.filesystem
397             dirname = filesystem.dirname(self._archive_path)
398             filename = filesystem.split(self._archive_path)[1]
399             writer = TestResultWriter(filesystem, self._port, dirname, filename)
400             if record:
401                 writer.write_image_files(actual_image=None, expected_image=output.image)
402             else:
403                 writer.write_image_files(actual_image=output.image, expected_image=None)
404
405             return output
406         finally:
407             server.stop()
408
409
410 class PerfTestFactory(object):
411
412     _pattern_map = [
413         (re.compile(r'^Dromaeo/'), SingleProcessPerfTest),
414         (re.compile(r'(.+)\.replay$'), ReplayPerfTest),
415     ]
416
417     @classmethod
418     def create_perf_test(cls, port, test_name, path, test_runner_count=DEFAULT_TEST_RUNNER_COUNT):
419         for (pattern, test_class) in cls._pattern_map:
420             if pattern.match(test_name):
421                 return test_class(port, test_name, path, test_runner_count)
422         return PerfTest(port, test_name, path, test_runner_count)