8294547211414a0fa6d8678a85e5d10ebe15787d
[WebKit-https.git] / Tools / Scripts / webkitpy / performance_tests / perftest.py
1 # Copyright (C) 2012 Google Inc. All rights reserved.
2 # Copyright (C) 2012 Zoltan Horvath, Adobe Systems Incorporated. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30
31 import errno
32 import logging
33 import math
34 import re
35 import os
36 import signal
37 import socket
38 import subprocess
39 import sys
40 import time
41
42 # Import for auto-install
43 if sys.platform not in ('cygwin', 'win32'):
44     # FIXME: webpagereplay doesn't work on win32. See https://bugs.webkit.org/show_bug.cgi?id=88279.
45     import webkitpy.thirdparty.autoinstalled.webpagereplay.replay
46
47 from webkitpy.layout_tests.controllers.test_result_writer import TestResultWriter
48 from webkitpy.layout_tests.port.driver import DriverInput
49 from webkitpy.layout_tests.port.driver import DriverOutput
50
51
52 _log = logging.getLogger(__name__)
53
54
55 class PerfTestMetric(object):
56     def __init__(self, metric, unit=None, iterations=None):
57         self._metric = metric
58         self._iterations = iterations or []
59         self._unit = unit or self.metric_to_unit(metric)
60
61     def metric(self):
62         return self._metric
63
64     def has_values(self):
65         return bool(self._iterations)
66
67     # FIXME: We don't need to support this anymore. Make outputs more human friendly.
68     def legacy_chromium_bot_compatible_test_name(self, test_name_with_extension):
69         test_name = re.sub(r'\.\w+$', '', test_name_with_extension)
70         return test_name if self._metric == 'Time' else test_name + ':' + self._metric
71
72     def append(self, value):
73         self._iterations.append(value)
74
75     def to_dict(self):
76         assert self.has_values()
77         statistics = self.compute_statistics(self._iterations)
78         statistics['unit'] = self._unit
79         statistics['values'] = self._iterations
80         return statistics
81
82     @classmethod
83     def metric_to_unit(cls, metric):
84         assert metric in ('Time', 'Malloc', 'JSHeap')
85         return 'ms' if metric == 'Time' else 'bytes'
86
87     @staticmethod
88     def compute_statistics(values):
89         sorted_values = sorted(values)
90
91         # Compute the mean and variance using Knuth's online algorithm (has good numerical stability).
92         squareSum = 0
93         mean = 0
94         for i, time in enumerate(sorted_values):
95             delta = time - mean
96             sweep = i + 1.0
97             mean += delta / sweep
98             squareSum += delta * (time - mean)
99
100         middle = int(len(sorted_values) / 2)
101         result = {'avg': sum(sorted_values) / len(values),
102             'min': sorted_values[0],
103             'max': sorted_values[-1],
104             'median': sorted_values[middle] if len(sorted_values) % 2 else (sorted_values[middle - 1] + sorted_values[middle]) / 2,
105             'stdev': math.sqrt(squareSum / (len(sorted_values) - 1)) if len(sorted_values) > 1 else 0}
106         return result
107
108
109 class PerfTest(object):
110     def __init__(self, port, test_name, test_path):
111         self._port = port
112         self._test_name = test_name
113         self._test_path = test_path
114         self._description = None
115
116     def test_name(self):
117         return self._test_name
118
119     def test_path(self):
120         return self._test_path
121
122     def description(self):
123         return self._description
124
125     def prepare(self, time_out_ms):
126         return True
127
128     def _create_driver(self):
129         return self._port.create_driver(worker_number=0, no_timeout=True)
130
131     def run(self, time_out_ms):
132         driver = self._create_driver()
133         try:
134             metrics = self._run_with_driver(driver, time_out_ms)
135         finally:
136             driver.stop()
137
138         if not metrics:
139             return metrics
140
141         results = {}
142         for metric in metrics:
143             legacy_test_name = metric.legacy_chromium_bot_compatible_test_name(self.test_name())
144             results[legacy_test_name] = metric.to_dict()
145
146         if not self._port.get_option('profile'):
147             if self._description:
148                 _log.info('DESCRIPTION: %s' % self._description)
149             for result_name in sorted(results.keys()):
150                 self.output_statistics(result_name, results[result_name])
151
152         return results
153
154     def _run_with_driver(self, driver, time_out_ms):
155         output = self.run_single(driver, self.test_path(), time_out_ms)
156         self._filter_output(output)
157         if self.run_failed(output):
158             return None
159
160         return self.parse_output(output)
161
162     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
163         return driver.run_test(DriverInput(test_path, time_out_ms, image_hash=None, should_run_pixel_test=should_run_pixel_test), stop_when_done=False)
164
165     def run_failed(self, output):
166         if output.text == None or output.error:
167             pass
168         elif output.timeout:
169             _log.error('timeout: %s' % self.test_name())
170         elif output.crash:
171             _log.error('crash: %s' % self.test_name())
172         else:
173             return False
174
175         if output.error:
176             _log.error('error: %s\n%s' % (self.test_name(), output.error))
177
178         return True
179
180     def _should_ignore_line(self, regexps, line):
181         if not line:
182             return True
183         for regexp in regexps:
184             if regexp.search(line):
185                 return True
186         return False
187
188     _lines_to_ignore_in_stderr = [
189         re.compile(r'^Unknown option:'),
190         re.compile(r'^\[WARNING:proxy_service.cc'),
191         re.compile(r'^\[INFO:'),
192     ]
193
194     def _should_ignore_line_in_stderr(self, line):
195         return self._should_ignore_line(self._lines_to_ignore_in_stderr, line)
196
197     _lines_to_ignore_in_parser_result = [
198         re.compile(r'^Running \d+ times$'),
199         re.compile(r'^Ignoring warm-up '),
200         re.compile(r'^Info:'),
201         re.compile(r'^\d+(.\d+)?(\s*(runs\/s|ms|fps))?$'),
202         # Following are for handle existing test like Dromaeo
203         re.compile(re.escape("""main frame - has 1 onunload handler(s)""")),
204         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->-->" - has 1 onunload handler(s)""")),
205         re.compile(re.escape("""frame "<!--framePath //<!--frame0-->/<!--frame0-->-->" - has 1 onunload handler(s)""")),
206         # Following is for html5.html
207         re.compile(re.escape("""Blocked access to external URL http://www.whatwg.org/specs/web-apps/current-work/""")),
208         re.compile(r"CONSOLE MESSAGE: Blocked script execution in '[A-Za-z0-9\-\.]+' because the document's frame is sandboxed and the 'allow-scripts' permission is not set."),
209         # Dromaeo reports values for subtests. Ignore them for now.
210         re.compile(r'(?P<name>.+): \[(?P<values>(\d+(.\d+)?,\s+)*\d+(.\d+)?)\]'),
211     ]
212
213     def _should_ignore_line_in_parser_test_result(self, line):
214         return self._should_ignore_line(self._lines_to_ignore_in_parser_result, line)
215
216     def _filter_output(self, output):
217         if output.error:
218             filtered_error = '\n'.join([line for line in re.split('\n', output.error) if not self._should_ignore_line_in_stderr(line)])
219             output.error = filtered_error if filtered_error else None
220         if output.text:
221             output.text = '\n'.join([line for line in re.split('\n', output.text) if not self._should_ignore_line_in_parser_test_result(line)])
222
223     _description_regex = re.compile(r'^Description: (?P<description>.*)$', re.IGNORECASE)
224     _metrics_regex = re.compile(r'^(?P<metric>Time|Malloc|JS Heap):')
225     _statistics_keys = ['avg', 'median', 'stdev', 'min', 'max', 'unit', 'values']
226     _score_regex = re.compile(r'^(?P<key>' + r'|'.join(_statistics_keys) + r')\s+(?P<value>([0-9\.]+(,\s+)?)+)\s*(?P<unit>.*)')
227
228     def parse_output(self, output):
229         current_metric = None
230         results = []
231         for line in re.split('\n', output.text):
232             if not line:
233                 continue
234
235             description_match = self._description_regex.match(line)
236             metric_match = self._metrics_regex.match(line)
237             score = self._score_regex.match(line)
238
239             if description_match:
240                 self._description = description_match.group('description')
241             elif metric_match:
242                 current_metric = metric_match.group('metric').replace(' ', '')
243             elif score:
244                 key = score.group('key')
245                 if key == 'values' and results != None:
246                     values = [float(number) for number in score.group('value').split(', ')]
247                     results.append(PerfTestMetric(current_metric, score.group('unit'), values))
248             else:
249                 results = None
250                 _log.error('ERROR: ' + line)
251
252         return results
253
254     def output_statistics(self, test_name, results):
255         unit = results['unit']
256         _log.info('RESULT %s= %s %s' % (test_name.replace(':', ': ').replace('/', ': '), results['avg'], unit))
257         _log.info(', '.join(['%s= %s %s' % (key, results[key], unit) for key in self._statistics_keys[1:5]]))
258
259
260 class ChromiumStylePerfTest(PerfTest):
261     _chromium_style_result_regex = re.compile(r'^RESULT\s+(?P<name>[^=]+)\s*=\s+(?P<value>\d+(\.\d+)?)\s*(?P<unit>\w+)$')
262
263     def __init__(self, port, test_name, test_path):
264         super(ChromiumStylePerfTest, self).__init__(port, test_name, test_path)
265
266     def run(self, time_out_ms):
267         driver = self._create_driver()
268         try:
269             output = self.run_single(driver, self.test_path(), time_out_ms)
270         finally:
271             driver.stop()
272
273         self._filter_output(output)
274         if self.run_failed(output):
275             return None
276
277         return self.parse_and_log_output(output)
278
279     def parse_and_log_output(self, output):
280         test_failed = False
281         results = {}
282         for line in re.split('\n', output.text):
283             resultLine = ChromiumStylePerfTest._chromium_style_result_regex.match(line)
284             if resultLine:
285                 # FIXME: Store the unit
286                 results[self.test_name() + ':' + resultLine.group('name').replace(' ', '')] = float(resultLine.group('value'))
287                 _log.info(line)
288             elif not len(line) == 0:
289                 test_failed = True
290                 _log.error(line)
291         return results if results and not test_failed else None
292
293
294 class PageLoadingPerfTest(PerfTest):
295     _FORCE_GC_FILE = 'resources/force-gc.html'
296
297     def __init__(self, port, test_name, test_path):
298         super(PageLoadingPerfTest, self).__init__(port, test_name, test_path)
299         self.force_gc_test = self._port.host.filesystem.join(self._port.perf_tests_dir(), self._FORCE_GC_FILE)
300
301     def run_single(self, driver, test_path, time_out_ms, should_run_pixel_test=False):
302         # Force GC to prevent pageload noise. See https://bugs.webkit.org/show_bug.cgi?id=98203
303         super(PageLoadingPerfTest, self).run_single(driver, self.force_gc_test, time_out_ms, False)
304         return super(PageLoadingPerfTest, self).run_single(driver, test_path, time_out_ms, should_run_pixel_test)
305
306     def _run_with_driver(self, driver, time_out_ms):
307         times = PerfTestMetric('Time')
308         malloc = PerfTestMetric('Malloc')
309         js_heap = PerfTestMetric('JSHeap')
310
311         for i in range(0, 20):
312             output = self.run_single(driver, self.test_path(), time_out_ms)
313             if not output or self.run_failed(output):
314                 return None
315             if i == 0:
316                 continue
317
318             times.append(output.test_time * 1000)
319             if not output.measurements:
320                 continue
321
322             for metric, result in output.measurements.items():
323                 assert metric == 'Malloc' or metric == 'JSHeap'
324                 if metric == 'Malloc':
325                     malloc.append(result)
326                 else:
327                     js_heap.append(result)
328
329         return filter(lambda metric: metric.has_values(), [times, malloc, js_heap])
330
331
332 class ReplayServer(object):
333     def __init__(self, archive, record):
334         self._process = None
335
336         # FIXME: Should error if local proxy isn't set to forward requests to localhost:8080 and localhost:8443
337
338         replay_path = webkitpy.thirdparty.autoinstalled.webpagereplay.replay.__file__
339         args = ['python', replay_path, '--no-dns_forwarding', '--port', '8080', '--ssl_port', '8443', '--use_closest_match', '--log_level', 'warning']
340         if record:
341             args.append('--record')
342         args.append(archive)
343
344         self._process = subprocess.Popen(args)
345
346     def wait_until_ready(self):
347         for i in range(0, 3):
348             try:
349                 connection = socket.create_connection(('localhost', '8080'), timeout=1)
350                 connection.close()
351                 return True
352             except socket.error:
353                 time.sleep(1)
354                 continue
355         return False
356
357     def stop(self):
358         if self._process:
359             self._process.send_signal(signal.SIGINT)
360             self._process.wait()
361         self._process = None
362
363     def __del__(self):
364         self.stop()
365
366
367 class ReplayPerfTest(PageLoadingPerfTest):
368     def __init__(self, port, test_name, test_path):
369         super(ReplayPerfTest, self).__init__(port, test_name, test_path)
370
371     def _start_replay_server(self, archive, record):
372         try:
373             return ReplayServer(archive, record)
374         except OSError as error:
375             if error.errno == errno.ENOENT:
376                 _log.error("Replay tests require web-page-replay.")
377             else:
378                 raise error
379
380     def prepare(self, time_out_ms):
381         filesystem = self._port.host.filesystem
382         path_without_ext = filesystem.splitext(self.test_path())[0]
383
384         self._archive_path = filesystem.join(path_without_ext + '.wpr')
385         self._expected_image_path = filesystem.join(path_without_ext + '-expected.png')
386         self._url = filesystem.read_text_file(self.test_path()).split('\n')[0]
387
388         if filesystem.isfile(self._archive_path) and filesystem.isfile(self._expected_image_path):
389             _log.info("Replay ready for %s" % self._archive_path)
390             return True
391
392         _log.info("Preparing replay for %s" % self.test_name())
393
394         driver = self._port.create_driver(worker_number=0, no_timeout=True)
395         try:
396             output = self.run_single(driver, self._archive_path, time_out_ms, record=True)
397         finally:
398             driver.stop()
399
400         if not output or not filesystem.isfile(self._archive_path):
401             _log.error("Failed to prepare a replay for %s" % self.test_name())
402             return False
403
404         _log.info("Prepared replay for %s" % self.test_name())
405
406         return True
407
408     def run_single(self, driver, url, time_out_ms, record=False):
409         server = self._start_replay_server(self._archive_path, record)
410         if not server:
411             _log.error("Web page replay didn't start.")
412             return None
413
414         try:
415             _log.debug("Waiting for Web page replay to start.")
416             if not server.wait_until_ready():
417                 _log.error("Web page replay didn't start.")
418                 return None
419
420             _log.debug("Web page replay started. Loading the page.")
421             output = super(ReplayPerfTest, self).run_single(driver, self._url, time_out_ms, should_run_pixel_test=True)
422             if self.run_failed(output):
423                 return None
424
425             if not output.image:
426                 _log.error("Loading the page did not generate image results")
427                 _log.error(output.text)
428                 return None
429
430             filesystem = self._port.host.filesystem
431             dirname = filesystem.dirname(self._archive_path)
432             filename = filesystem.split(self._archive_path)[1]
433             writer = TestResultWriter(filesystem, self._port, dirname, filename)
434             if record:
435                 writer.write_image_files(actual_image=None, expected_image=output.image)
436             else:
437                 writer.write_image_files(actual_image=output.image, expected_image=None)
438
439             return output
440         finally:
441             server.stop()
442
443
444 class PerfTestFactory(object):
445
446     _pattern_map = [
447         (re.compile(r'^inspector/'), ChromiumStylePerfTest),
448         (re.compile(r'(.+)\.replay$'), ReplayPerfTest),
449     ]
450
451     @classmethod
452     def create_perf_test(cls, port, test_name, path):
453         for (pattern, test_class) in cls._pattern_map:
454             if pattern.match(test_name):
455                 return test_class(port, test_name, path)
456         return PerfTest(port, test_name, path)