057a736db2ba5ce997268b1a40b673846512ac67
[WebKit-https.git] / Tools / Scripts / webkitpy / performance_tests / perftest.py
1 # Copyright (C) 2012 Google Inc. All rights reserved.
2 # Copyright (C) 2012 Zoltan Horvath, Adobe Systems Incorporated. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 import errno
32 import logging
33 import math
34 import re
35 import os
36 import signal
37 import socket
38 import subprocess
39 import sys
40 import time
41
42 # Import for auto-install
43 if sys.platform not in ('cygwin', 'win32'):
44     # FIXME: webpagereplay doesn't work on win32. See https://bugs.webkit.org/show_bug.cgi?id=88279.
45     import webkitpy.thirdparty.autoinstalled.webpagereplay.replay
46
47 from webkitpy.layout_tests.controllers.test_result_writer import TestResultWriter
48 from webkitpy.layout_tests.port.driver import DriverInput
49 from webkitpy.layout_tests.port.driver import DriverOutput
50
51
52 _log = logging.getLogger(__name__)
53
54
55 class PerfTestMetric(object):
56     def __init__(self, metric, unit=None, iterations=None):
57         self._metric = metric
58         self._iterations = iterations or []
59         self._unit = unit or self.metric_to_unit(metric)
60
61     def metric(self):
62         return self._metric
63
64     def has_values(self):
65         return bool(self._iterations)
66
67     # FIXME: We don't need to support this anymore. Make outputs more human friendly.
68     def legacy_chromium_bot_compatible_test_name(self, test_name_with_extension):
69         test_name = re.sub(r'\.\w+$', '', test_name_with_extension)
70         return test_name if self._metric == 'Time' else test_name + ':' + self._metric
71
72     def append(self, value):
73         self._iterations.append(value)
74
75     def to_dict(self):
76         assert self.has_values()
77         statistics = self.compute_statistics(self._iterations)
78         statistics['unit'] = self._unit
79         statistics['values'] = self._iterations
80         return statistics
81
82     @classmethod
83     def metric_to_unit(cls, metric):
84         assert metric in ('Time', 'Malloc', 'JSHeap')
85         return 'ms' if metric == 'Time' else 'bytes'
86
87     @staticmethod
88     def compute_statistics(values):
89         sorted_values = sorted(values)
90
91         # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
92         squareSum = 0
93         mean = 0
94         for i, time in enumerate(sorted_values):
95             delta = time - mean
96             sweep = i + 1.0
97             mean += delta / sweep
98             squareSum += delta * (time - mean)
99
100         middle = int(len(sorted_values) / 2)
101         result = {'avg': sum(sorted_values) / len(values),
102             'min': sorted_values[0],
103             'max': sorted_values[-1],
104             'median': sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2,
105             'stdev': math.sqrt(squareSum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0}
106         return result
107
108
109 class PerfTest(object):
110     def __init__(self, port, test_name, test_path):
111         self._port = port
112         self._test_name = test_name
113         self._test_path = test_path
114         self._description = None
115
116     def test_name(self):
117         return self._test_name
118
119     def test_path(self):
120         return self._test_path
121
122     def description(self):
123         return self._description
124
125     def prepare(self, time_out_ms):
126         return True
127
128     def _create_driver(self):
129         return self._port.create_driver(worker_number=0, no_timeout=True)
130
131     def run(self, time_out_ms):
132         driver = self._create_driver()
133         try:
134             metrics = self._run_with_driver(driver, time_out_ms)
135         finally:
136             driver.stop()
137
138         if not metrics:
139             return metrics
140
141         results = {}
142         for metric in metrics:
143             legacy_test_name = metric.legacy_chromium_bot_compatible_test_name(self.test_name())
144             results[legacy_test_name] = metric.to_dict()
145
146         if not self._port.get_option('profile'):
147             if self._description:
148                 _log.info('DESCRIPTION: %s' % self._description)
149             for result_name in sorted(results.keys()):
150                 self.output_statistics(result_name, results[result_name])
151
152         return results
153
154     def _run_with_driver(self, driver, time_out_ms):
155         output = self.run_single(driver, self.test_path(), time_out_ms)
156         self._filter_output(output)
157         if self.run_failed(output):
158             return None
159
160         return self.parse_output(output)
161
162     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
163         return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)
164
165     def run_failed(self, output):
166         if output.text == None or output.error:
167             pass
168         elif output.timeout:
169             _log.error('timeout: %s' % self.test_name())
170         elif output.crash:
171             _log.error('crash: %s' % self.test_name())
172         else:
173             return False
174
175         if output.error:
176             _log.error('error: %s\n%s' % (self.test_name(), output.error))
177
178         return True
179
180     def _should_ignore_line(self, regexps, line):
181         if not line:
182             return True
183         for regexp in regexps:
184             if regexp.search(line):
185                 return True
186         return False
187
188     _lines_to_ignore_in_stderr = [
189         re.compile(r'^Unknown option:'),
190         re.compile(r'^\[WARNING:proxy_service.cc'),
191         re.compile(r'^\[INFO:'),
192     ]
193
194     def _should_ignore_line_in_stderr(self, line):
195         return self._should_ignore_line(self._lines_to_ignore_in_stderr, line)
196
197     _lines_to_ignore_in_parser_result = [
198         re.compile(r'^Running \d+ times$'),
199         re.compile(r'^Ignoring warm-up '),
200         re.compile(r'^Info:'),
201         re.compile(r'^\d+(.\d+)?(\s*(runs\/s|ms|fps))?$'),
202         # Following are for handle existing test like Dromaeo
203         re.compile(re.escape("""main frame - has 1 onunload handler(s)""")),
204         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->-->" - has 1 onunload handler(s)""")),
205         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->/<!--frame0-->-->" - has 1 onunload handler(s)""")),
206         # Following is for html5.html
207         re.compile(re.escape("""Blocked access to external URL http://www.whatwg.org/specs/web-apps/current-work/""")),
208         # Following is for Parser/html-parser.html
209         re.compile(re.escape("""CONSOLE MESSAGE: Blocked script execution in 'html-parser.html' because the document's frame is sandboxed and the 'allow-scripts' permission is not set.""")),
210         # Dromaeo reports values for subtests. Ignore them for now.
211         re.compile(r'(?P<name>.+): \[(?P<values>(\d+(.\d+)?,\s+)*\d+(.\d+)?)\]'),
212     ]
213
214     def _should_ignore_line_in_parser_test_result(self, line):
215         return self._should_ignore_line(self._lines_to_ignore_in_parser_result, line)
216
217     def _filter_output(self, output):
218         if output.error:
219             filtered_error = '\n'.join([line for line in re.split('\n', output.error) if not self._should_ignore_line_in_stderr(line)])
220             output.error = filtered_error if filtered_error else None
221         if output.text:
222             output.text = '\n'.join([line for line in re.split('\n', output.text) if not self._should_ignore_line_in_parser_test_result(line)])
223
224     _description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
225     _metrics_regex = re.compile(r'^(?P<metric>Time|Malloc|JS Heap):')
226     _statistics_keys = ['avg', 'median', 'stdev', 'min', 'max', 'unit', 'values']
227     _score_regex = re.compile(r'^(?P<key>' + r'|'.join(_statistics_keys) + r')\s+(?P<value>([0-9\.]+(,\s+)?)+)\s*(?P<unit>.*)')
228
229     def parse_output(self, output):
230         current_metric = None
231         results = []
232         for line in re.split('\n', output.text):
233             if not line:
234                 continue
235
236             description_match = self._description_regex.match(line)
237             metric_match = self._metrics_regex.match(line)
238             score = self._score_regex.match(line)
239
240             if description_match:
241                 self._description = description_match.group('description')
242             elif metric_match:
243                 current_metric = metric_match.group('metric').replace(' ', '')
244             elif score:
245                 key = score.group('key')
246                 if key == 'values' and results != None:
247                     values = [float(number) for number in score.group('value').split(', ')]
248                     results.append(PerfTestMetric(current_metric, score.group('unit'), values))
249             else:
250                 results = None
251                 _log.error('ERROR: ' + line)
252
253         return results
254
255     def output_statistics(self, test_name, results):
256         unit = results['unit']
257         _log.info('RESULT %s= %s %s' % (test_name.replace(':', ': ').replace('/', ': '), results['avg'], unit))
258         _log.info(', '.join(['%s= %s %s' % (key, results[key], unit) for key in self._statistics_keys[1:5]]))
259
260
261 class ChromiumStylePerfTest(PerfTest):
262     _chromium_style_result_regex = re.compile(r'^RESULT\s+(?P<name>[^=]+)\s*=\s+(?P<value>\d+(\.\d+)?)\s*(?P<unit>\w+)$')
263
264     def __init__(self, port, test_name, test_path):
265         super(ChromiumStylePerfTest, self).__init__(port, test_name, test_path)
266
267     def run(self, time_out_ms):
268         driver = self._create_driver()
269         try:
270             output = self.run_single(driver, self.test_path(), time_out_ms)
271         finally:
272             driver.stop()
273
274         self._filter_output(output)
275         if self.run_failed(output):
276             return None
277
278         return self.parse_and_log_output(output)
279
280     def parse_and_log_output(self, output):
281         test_failed = False
282         results = {}
283         for line in re.split('\n', output.text):
284             resultLine = ChromiumStylePerfTest._chromium_style_result_regex.match(line)
285             if resultLine:
286                 # FIXME: Store the unit
287                 results[self.test_name() + ':' + resultLine.group('name').replace(' ', '')] = float(resultLine.group('value'))
288                 _log.info(line)
289             elif not len(line) == 0:
290                 test_failed = True
291                 _log.error(line)
292         return results if results and not test_failed else None
293
294
295 class PageLoadingPerfTest(PerfTest):
296     _FORCE_GC_FILE = 'resources/force-gc.html'
297
298     def __init__(self, port, test_name, test_path):
299         super(PageLoadingPerfTest, self).__init__(port, test_name, test_path)
300         self.force_gc_test = self._port.host.filesystem.join(self._port.perf_tests_dir(), self._FORCE_GC_FILE)
301
302     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
303         # Force GC to prevent pageload noise. See https://bugs.webkit.org/show_bug.cgi?id=98203
304         super(PageLoadingPerfTest, self).run_single(driver, self.force_gc_test, time_out_ms, False)
305         return super(PageLoadingPerfTest, self).run_single(driver, test_path, time_out_ms, should_run_pixel_test)
306
307     def _run_with_driver(self, driver, time_out_ms):
308         times = PerfTestMetric('Time')
309         malloc = PerfTestMetric('Malloc')
310         js_heap = PerfTestMetric('JSHeap')
311
312         for i in range(0, 20):
313             output = self.run_single(driver, self.test_path(), time_out_ms)
314             if not output or self.run_failed(output):
315                 return None
316             if i == 0:
317                 continue
318
319             times.append(output.test_time * 1000)
320             if not output.measurements:
321                 continue
322
323             for metric, result in output.measurements.items():
324                 assert metric == 'Malloc' or metric == 'JSHeap'
325                 if metric == 'Malloc':
326                     malloc.append(result)
327                 else:
328                     js_heap.append(result)
329
330         return filter(lambda metric: metric.has_values(), [times, malloc, js_heap])
331
332
333 class ReplayServer(object):
334     def __init__(self, archive, record):
335         self._process = None
336
337         # FIXME: Should error if local proxy isn't set to forward requests to localhost:8080 and localhost:8443
338
339         replay_path = webkitpy.thirdparty.autoinstalled.webpagereplay.replay.__file__
340         args = ['python', replay_path, '--no-dns_forwarding', '--port', '8080', '--ssl_port', '8443', '--use_closest_match', '--log_level', 'warning']
341         if record:
342             args.append('--record')
343         args.append(archive)
344
345         self._process = subprocess.Popen(args)
346
347     def wait_until_ready(self):
348         for i in range(0, 3):
349             try:
350                 connection = socket.create_connection(('localhost', '8080'), timeout=1)
351                 connection.close()
352                 return True
353             except socket.error:
354                 time.sleep(1)
355                 continue
356         return False
357
358     def stop(self):
359         if self._process:
360             self._process.send_signal(signal.SIGINT)
361             self._process.wait()
362         self._process = None
363
364     def __del__(self):
365         self.stop()
366
367
368 class ReplayPerfTest(PageLoadingPerfTest):
369     def __init__(self, port, test_name, test_path):
370         super(ReplayPerfTest, self).__init__(port, test_name, test_path)
371
372     def _start_replay_server(self, archive, record):
373         try:
374             return ReplayServer(archive, record)
375         except OSError as error:
376             if error.errno == errno.ENOENT:
377                 _log.error("Replay tests require web-page-replay.")
378             else:
379                 raise error
380
381     def prepare(self, time_out_ms):
382         filesystem = self._port.host.filesystem
383         path_without_ext = filesystem.splitext(self.test_path())[0]
384
385         self._archive_path = filesystem.join(path_without_ext + '.wpr')
386         self._expected_image_path = filesystem.join(path_without_ext + '-expected.png')
387         self._url = filesystem.read_text_file(self.test_path()).split('\n')[0]
388
389         if filesystem.isfile(self._archive_path) and filesystem.isfile(self._expected_image_path):
390             _log.info("Replay ready for %s" % self._archive_path)
391             return True
392
393         _log.info("Preparing replay for %s" % self.test_name())
394
395         driver = self._port.create_driver(worker_number=0, no_timeout=True)
396         try:
397             output = self.run_single(driver, self._archive_path, time_out_ms, record=True)
398         finally:
399             driver.stop()
400
401         if not output or not filesystem.isfile(self._archive_path):
402             _log.error("Failed to prepare a replay for %s" % self.test_name())
403             return False
404
405         _log.info("Prepared replay for %s" % self.test_name())
406
407         return True
408
409     def run_single(self, driver, url, time_out_ms, record=False):
410         server = self._start_replay_server(self._archive_path, record)
411         if not server:
412             _log.error("Web page replay didn't start.")
413             return None
414
415         try:
416             _log.debug("Waiting for Web page replay to start.")
417             if not server.wait_until_ready():
418                 _log.error("Web page replay didn't start.")
419                 return None
420
421             _log.debug("Web page replay started. Loading the page.")
422             output = super(ReplayPerfTest, self).run_single(driver, self._url, time_out_ms, should_run_pixel_test=True)
423             if self.run_failed(output):
424                 return None
425
426             if not output.image:
427                 _log.error("Loading the page did not generate image results")
428                 _log.error(output.text)
429                 return None
430
431             filesystem = self._port.host.filesystem
432             dirname = filesystem.dirname(self._archive_path)
433             filename = filesystem.split(self._archive_path)[1]
434             writer = TestResultWriter(filesystem, self._port, dirname, filename)
435             if record:
436                 writer.write_image_files(actual_image=None, expected_image=output.image)
437             else:
438                 writer.write_image_files(actual_image=output.image, expected_image=None)
439
440             return output
441         finally:
442             server.stop()
443
444
445 class PerfTestFactory(object):
446
447     _pattern_map = [
448         (re.compile(r'^inspector/'), ChromiumStylePerfTest),
449         (re.compile(r'(.+)\.replay$'), ReplayPerfTest),
450     ]
451
452     @classmethod
453     def create_perf_test(cls, port, test_name, path):
454         for (pattern, test_class) in cls._pattern_map:
455             if pattern.match(test_name):
456                 return test_class(port, test_name, path)
457         return PerfTest(port, test_name, path)