Unreviewed. Update W3C WebDriver imported tests.
[WebKit-https.git] / WebDriverTests / imported / w3c / tools / wptrunner / wptrunner / testloader.py
1 import hashlib
2 import json
3 import os
4 import urlparse
5 from abc import ABCMeta, abstractmethod
6 from Queue import Empty
7 from collections import defaultdict, OrderedDict, deque
8 from multiprocessing import Queue
9
10 import manifestinclude
11 import manifestexpected
12 import wpttest
13 from mozlog import structured
14
15 manifest = None
16 manifest_update = None
17 download_from_github = None
18 manifest_log = None
19
20 def do_delayed_imports():
21     # This relies on an already loaded module having set the sys.path correctly :(
22     global manifest, manifest_update, download_from_github, manifest_log
23     from manifest import manifest
24     from manifest import update as manifest_update
25     from manifest.download import download_from_github
26     from manifest import log as manifest_log
27
28
29 class TestChunker(object):
30     def __init__(self, total_chunks, chunk_number):
31         self.total_chunks = total_chunks
32         self.chunk_number = chunk_number
33         assert self.chunk_number <= self.total_chunks
34         self.logger = structured.get_default_logger()
35         assert self.logger
36
37     def __call__(self, manifest):
38         raise NotImplementedError
39
40
41 class Unchunked(TestChunker):
42     def __init__(self, *args, **kwargs):
43         TestChunker.__init__(self, *args, **kwargs)
44         assert self.total_chunks == 1
45
46     def __call__(self, manifest):
47         for item in manifest:
48             yield item
49
50
51 class HashChunker(TestChunker):
52     def __call__(self, manifest):
53         chunk_index = self.chunk_number - 1
54         for test_type, test_path, tests in manifest:
55             h = int(hashlib.md5(test_path).hexdigest(), 16)
56             if h % self.total_chunks == chunk_index:
57                 yield test_type, test_path, tests
58
59
60 class DirectoryHashChunker(TestChunker):
61     """Like HashChunker except the directory is hashed.
62
63     This ensures that all tests in the same directory end up in the same
64     chunk.
65     """
66     def __call__(self, manifest):
67         chunk_index = self.chunk_number - 1
68         for test_type, test_path, tests in manifest:
69             h = int(hashlib.md5(os.path.dirname(test_path)).hexdigest(), 16)
70             if h % self.total_chunks == chunk_index:
71                 yield test_type, test_path, tests
72
73
74 class EqualTimeChunker(TestChunker):
75     def _group_by_directory(self, manifest_items):
76         """Split the list of manifest items into a ordered dict that groups tests in
77         so that anything in the same subdirectory beyond a depth of 3 is in the same
78         group. So all tests in a/b/c, a/b/c/d and a/b/c/e will be grouped together
79         and separate to tests in a/b/f
80
81         Returns: tuple (ordered dict of {test_dir: PathData}, total estimated runtime)
82         """
83
84         class PathData(object):
85             def __init__(self, path):
86                 self.path = path
87                 self.time = 0
88                 self.tests = []
89
90         by_dir = OrderedDict()
91         total_time = 0
92
93         for i, (test_type, test_path, tests) in enumerate(manifest_items):
94             test_dir = tuple(os.path.split(test_path)[0].split(os.path.sep)[:3])
95
96             if not test_dir in by_dir:
97                 by_dir[test_dir] = PathData(test_dir)
98
99             data = by_dir[test_dir]
100             time = sum(test.default_timeout if test.timeout !=
101                        "long" else test.long_timeout for test in tests)
102             data.time += time
103             total_time += time
104             data.tests.append((test_type, test_path, tests))
105
106         return by_dir, total_time
107
108     def _maybe_remove(self, chunks, i, direction):
109         """Trial removing a chunk from one chunk to an adjacent one.
110
111         :param chunks: - the list of all chunks
112         :param i: - the chunk index in the list of chunks to try removing from
113         :param direction: either "next" if we are going to move from the end to
114                           the subsequent chunk, or "prev" if we are going to move
115                           from the start into the previous chunk.
116
117         :returns bool: Did a chunk get moved?"""
118         source_chunk = chunks[i]
119         if direction == "next":
120             target_chunk = chunks[i+1]
121             path_index = -1
122             move_func = lambda: target_chunk.appendleft(source_chunk.pop())
123         elif direction == "prev":
124             target_chunk = chunks[i-1]
125             path_index = 0
126             move_func = lambda: target_chunk.append(source_chunk.popleft())
127         else:
128             raise ValueError("Unexpected move direction %s" % direction)
129
130         return self._maybe_move(source_chunk, target_chunk, path_index, move_func)
131
132     def _maybe_add(self, chunks, i, direction):
133         """Trial adding a chunk from one chunk to an adjacent one.
134
135         :param chunks: - the list of all chunks
136         :param i: - the chunk index in the list of chunks to try adding to
137         :param direction: either "next" if we are going to remove from the
138                           the subsequent chunk, or "prev" if we are going to remove
139                           from the the previous chunk.
140
141         :returns bool: Did a chunk get moved?"""
142         target_chunk = chunks[i]
143         if direction == "next":
144             source_chunk = chunks[i+1]
145             path_index = 0
146             move_func = lambda: target_chunk.append(source_chunk.popleft())
147         elif direction == "prev":
148             source_chunk = chunks[i-1]
149             path_index = -1
150             move_func = lambda: target_chunk.appendleft(source_chunk.pop())
151         else:
152             raise ValueError("Unexpected move direction %s" % direction)
153
154         return self._maybe_move(source_chunk, target_chunk, path_index, move_func)
155
156     def _maybe_move(self, source_chunk, target_chunk, path_index, move_func):
157         """Move from one chunk to another, assess the change in badness,
158         and keep the move iff it decreases the badness score.
159
160         :param source_chunk: chunk to move from
161         :param target_chunk: chunk to move to
162         :param path_index: 0 if we are moving from the start or -1 if we are moving from the
163                            end
164         :param move_func: Function that actually moves between chunks"""
165         if len(source_chunk.paths) <= 1:
166             return False
167
168         move_time = source_chunk.paths[path_index].time
169
170         new_source_badness = self._badness(source_chunk.time - move_time)
171         new_target_badness = self._badness(target_chunk.time + move_time)
172
173         delta_badness = ((new_source_badness + new_target_badness) -
174                          (source_chunk.badness + target_chunk.badness))
175         if delta_badness < 0:
176             move_func()
177             return True
178
179         return False
180
181     def _badness(self, time):
182         """Metric of badness for a specific chunk
183
184         :param time: the time for a specific chunk"""
185         return (time - self.expected_time)**2
186
187     def _get_chunk(self, manifest_items):
188         by_dir, total_time = self._group_by_directory(manifest_items)
189
190         if len(by_dir) < self.total_chunks:
191             raise ValueError("Tried to split into %i chunks, but only %i subdirectories included" % (
192                 self.total_chunks, len(by_dir)))
193
194         self.expected_time = float(total_time) / self.total_chunks
195
196         chunks = self._create_initial_chunks(by_dir)
197
198         while True:
199             # Move a test from one chunk to the next until doing so no longer
200             # reduces the badness
201             got_improvement = self._update_chunks(chunks)
202             if not got_improvement:
203                 break
204
205         self.logger.debug(self.expected_time)
206         for i, chunk in chunks.iteritems():
207             self.logger.debug("%i: %i, %i" % (i + 1, chunk.time, chunk.badness))
208
209         assert self._all_tests(by_dir) == self._chunked_tests(chunks)
210
211         return self._get_tests(chunks)
212
213     @staticmethod
214     def _all_tests(by_dir):
215         """Return a set of all tests in the manifest from a grouping by directory"""
216         return set(x[0] for item in by_dir.itervalues()
217                    for x in item.tests)
218
219     @staticmethod
220     def _chunked_tests(chunks):
221         """Return a set of all tests in the manifest from the chunk list"""
222         return set(x[0] for chunk in chunks.itervalues()
223                    for path in chunk.paths
224                    for x in path.tests)
225
226
227     def _create_initial_chunks(self, by_dir):
228         """Create an initial unbalanced list of chunks.
229
230         :param by_dir: All tests in the manifest grouped by subdirectory
231         :returns list: A list of Chunk objects"""
232
233         class Chunk(object):
234             def __init__(self, paths, index):
235                 """List of PathData objects that together form a single chunk of
236                 tests"""
237                 self.paths = deque(paths)
238                 self.time = sum(item.time for item in paths)
239                 self.index = index
240
241             def appendleft(self, path):
242                 """Add a PathData object to the start of the chunk"""
243                 self.paths.appendleft(path)
244                 self.time += path.time
245
246             def append(self, path):
247                 """Add a PathData object to the end of the chunk"""
248                 self.paths.append(path)
249                 self.time += path.time
250
251             def pop(self):
252                 """Remove PathData object from the end of the chunk"""
253                 assert len(self.paths) > 1
254                 self.time -= self.paths[-1].time
255                 return self.paths.pop()
256
257             def popleft(self):
258                 """Remove PathData object from the start of the chunk"""
259                 assert len(self.paths) > 1
260                 self.time -= self.paths[0].time
261                 return self.paths.popleft()
262
263             @property
264             def badness(self_):
265                 """Badness metric for this chunk"""
266                 return self._badness(self_.time)
267
268         initial_size = len(by_dir) / self.total_chunks
269         chunk_boundaries = [initial_size * i
270                             for i in xrange(self.total_chunks)] + [len(by_dir)]
271
272         chunks = OrderedDict()
273         for i, lower in enumerate(chunk_boundaries[:-1]):
274             upper = chunk_boundaries[i + 1]
275             paths = by_dir.values()[lower:upper]
276             chunks[i] = Chunk(paths, i)
277
278         assert self._all_tests(by_dir) == self._chunked_tests(chunks)
279
280         return chunks
281
282     def _update_chunks(self, chunks):
283         """Run a single iteration of the chunk update algorithm.
284
285         :param chunks: - List of chunks
286         """
287         #TODO: consider replacing this with a heap
288         sorted_chunks = sorted(chunks.values(), key=lambda x:-x.badness)
289         got_improvement = False
290         for chunk in sorted_chunks:
291             if chunk.time < self.expected_time:
292                 f = self._maybe_add
293             else:
294                 f = self._maybe_remove
295
296             if chunk.index == 0:
297                 order = ["next"]
298             elif chunk.index == self.total_chunks - 1:
299                 order = ["prev"]
300             else:
301                 if chunk.time < self.expected_time:
302                     # First try to add a test from the neighboring chunk with the
303                     # greatest total time
304                     if chunks[chunk.index + 1].time > chunks[chunk.index - 1].time:
305                         order = ["next", "prev"]
306                     else:
307                         order = ["prev", "next"]
308                 else:
309                     # First try to remove a test and add to the neighboring chunk with the
310                     # lowest total time
311                     if chunks[chunk.index + 1].time > chunks[chunk.index - 1].time:
312                         order = ["prev", "next"]
313                     else:
314                         order = ["next", "prev"]
315
316             for direction in order:
317                 if f(chunks, chunk.index, direction):
318                     got_improvement = True
319                     break
320
321             if got_improvement:
322                 break
323
324         return got_improvement
325
326     def _get_tests(self, chunks):
327         """Return the list of tests corresponding to the chunk number we are running.
328
329         :param chunks: List of chunks"""
330         tests = []
331         for path in chunks[self.chunk_number - 1].paths:
332             tests.extend(path.tests)
333
334         return tests
335
336     def __call__(self, manifest_iter):
337         manifest = list(manifest_iter)
338         tests = self._get_chunk(manifest)
339         for item in tests:
340             yield item
341
342
343 class TestFilter(object):
344     def __init__(self, test_manifests, include=None, exclude=None, manifest_path=None):
345         if manifest_path is not None and include is None:
346             self.manifest = manifestinclude.get_manifest(manifest_path)
347         else:
348             self.manifest = manifestinclude.IncludeManifest.create()
349             self.manifest.set_defaults()
350
351         if include:
352             self.manifest.set("skip", "true")
353             for item in include:
354                 self.manifest.add_include(test_manifests, item)
355
356         if exclude:
357             for item in exclude:
358                 self.manifest.add_exclude(test_manifests, item)
359
360     def __call__(self, manifest_iter):
361         for test_type, test_path, tests in manifest_iter:
362             include_tests = set()
363             for test in tests:
364                 if self.manifest.include(test):
365                     include_tests.add(test)
366
367             if include_tests:
368                 yield test_type, test_path, include_tests
369
370 class TagFilter(object):
371     def __init__(self, tags):
372         self.tags = set(tags)
373
374     def __call__(self, test_iter):
375         for test in test_iter:
376             if test.tags & self.tags:
377                 yield test
378
379 class ManifestLoader(object):
380     def __init__(self, test_paths, force_manifest_update=False, manifest_download=False):
381         do_delayed_imports()
382         self.test_paths = test_paths
383         self.force_manifest_update = force_manifest_update
384         self.manifest_download = manifest_download
385         self.logger = structured.get_default_logger()
386         if self.logger is None:
387             self.logger = structured.structuredlog.StructuredLogger("ManifestLoader")
388
389     def load(self):
390         rv = {}
391         for url_base, paths in self.test_paths.iteritems():
392             manifest_file = self.load_manifest(url_base=url_base,
393                                                **paths)
394             path_data = {"url_base": url_base}
395             path_data.update(paths)
396             rv[manifest_file] = path_data
397         return rv
398
399     def create_manifest(self, manifest_path, tests_path, url_base="/"):
400         self.update_manifest(manifest_path, tests_path, url_base, recreate=True,
401                              download=self.manifest_download)
402
403     def update_manifest(self, manifest_path, tests_path, url_base="/",
404                         recreate=False, download=False):
405         self.logger.info("Updating test manifest %s" % manifest_path)
406         manifest_log.setup()
407
408         json_data = None
409         if download:
410             # TODO: make this not github-specific
411             download_from_github(manifest_path, tests_path)
412
413         if not recreate:
414             try:
415                 with open(manifest_path) as f:
416                     json_data = json.load(f)
417             except IOError:
418                 #If the existing file doesn't exist just create one from scratch
419                 pass
420
421         if not json_data:
422             manifest_file = manifest.Manifest(url_base)
423         else:
424             try:
425                 manifest_file = manifest.Manifest.from_json(tests_path, json_data)
426             except manifest.ManifestVersionMismatch:
427                 manifest_file = manifest.Manifest(url_base)
428
429         manifest_update.update(tests_path, manifest_file, True)
430
431         manifest.write(manifest_file, manifest_path)
432
433     def load_manifest(self, tests_path, metadata_path, url_base="/"):
434         manifest_path = os.path.join(metadata_path, "MANIFEST.json")
435         if (not os.path.exists(manifest_path) or
436             self.force_manifest_update):
437             self.update_manifest(manifest_path, tests_path, url_base, download=self.manifest_download)
438         manifest_file = manifest.load(tests_path, manifest_path)
439         if manifest_file.url_base != url_base:
440             self.logger.info("Updating url_base in manifest from %s to %s" % (manifest_file.url_base,
441                                                                               url_base))
442             manifest_file.url_base = url_base
443             manifest.write(manifest_file, manifest_path)
444
445         return manifest_file
446
447 def iterfilter(filters, iter):
448     for f in filters:
449         iter = f(iter)
450     for item in iter:
451         yield item
452
453 class TestLoader(object):
454     def __init__(self,
455                  test_manifests,
456                  test_types,
457                  run_info,
458                  manifest_filters=None,
459                  meta_filters=None,
460                  chunk_type="none",
461                  total_chunks=1,
462                  chunk_number=1,
463                  include_https=True,
464                  skip_timeout=False):
465
466         self.test_types = test_types
467         self.run_info = run_info
468
469         self.manifest_filters = manifest_filters if manifest_filters is not None else []
470         self.meta_filters = meta_filters if meta_filters is not None else []
471
472         self.manifests = test_manifests
473         self.tests = None
474         self.disabled_tests = None
475         self.include_https = include_https
476         self.skip_timeout = skip_timeout
477
478         self.chunk_type = chunk_type
479         self.total_chunks = total_chunks
480         self.chunk_number = chunk_number
481
482         self.chunker = {"none": Unchunked,
483                         "hash": HashChunker,
484                         "dir_hash": DirectoryHashChunker,
485                         "equal_time": EqualTimeChunker}[chunk_type](total_chunks,
486                                                                     chunk_number)
487
488         self._test_ids = None
489
490         self.directory_manifests = {}
491
492         self._load_tests()
493
494     @property
495     def test_ids(self):
496         if self._test_ids is None:
497             self._test_ids = []
498             for test_dict in [self.disabled_tests, self.tests]:
499                 for test_type in self.test_types:
500                     self._test_ids += [item.id for item in test_dict[test_type]]
501         return self._test_ids
502
503     def get_test(self, manifest_test, inherit_metadata, test_metadata):
504         if test_metadata is not None:
505             inherit_metadata.append(test_metadata)
506             test_metadata = test_metadata.get_test(manifest_test.id)
507
508         return wpttest.from_manifest(manifest_test, inherit_metadata, test_metadata)
509
510     def load_dir_metadata(self, test_manifest, metadata_path, test_path):
511         rv = []
512         path_parts = os.path.dirname(test_path).split(os.path.sep)
513         for i in xrange(1,len(path_parts) + 1):
514             path = os.path.join(metadata_path, os.path.sep.join(path_parts[:i]), "__dir__.ini")
515             if path not in self.directory_manifests:
516                 self.directory_manifests[path] = manifestexpected.get_dir_manifest(path,
517                                                                                    self.run_info)
518             manifest = self.directory_manifests[path]
519             if manifest is not None:
520                 rv.append(manifest)
521         return rv
522
523     def load_metadata(self, test_manifest, metadata_path, test_path):
524         inherit_metadata = self.load_dir_metadata(test_manifest, metadata_path, test_path)
525         test_metadata = manifestexpected.get_manifest(
526             metadata_path, test_path, test_manifest.url_base, self.run_info)
527         return inherit_metadata, test_metadata
528
529     def iter_tests(self):
530         manifest_items = []
531
532         for manifest in sorted(self.manifests.keys(), key=lambda x:x.url_base):
533             manifest_iter = iterfilter(self.manifest_filters,
534                                        manifest.itertypes(*self.test_types))
535             manifest_items.extend(manifest_iter)
536
537         if self.chunker is not None:
538             manifest_items = self.chunker(manifest_items)
539
540         for test_type, test_path, tests in manifest_items:
541             manifest_file = iter(tests).next().manifest
542             metadata_path = self.manifests[manifest_file]["metadata_path"]
543             inherit_metadata, test_metadata = self.load_metadata(manifest_file, metadata_path, test_path)
544
545             for test in iterfilter(self.meta_filters,
546                                    self.iter_wpttest(inherit_metadata, test_metadata, tests)):
547                 yield test_path, test_type, test
548
549     def iter_wpttest(self, inherit_metadata, test_metadata, tests):
550         for manifest_test in tests:
551             yield self.get_test(manifest_test, inherit_metadata, test_metadata)
552
553     def _load_tests(self):
554         """Read in the tests from the manifest file and add them to a queue"""
555         tests = {"enabled":defaultdict(list),
556                  "disabled":defaultdict(list)}
557
558         for test_path, test_type, test in self.iter_tests():
559             enabled = not test.disabled()
560             if not self.include_https and test.environment["protocol"] == "https":
561                 enabled = False
562             if self.skip_timeout and test.expected() == "TIMEOUT":
563                 enabled = False
564             key = "enabled" if enabled else "disabled"
565             tests[key][test_type].append(test)
566
567         self.tests = tests["enabled"]
568         self.disabled_tests = tests["disabled"]
569
570     def groups(self, test_types, chunk_type="none", total_chunks=1, chunk_number=1):
571         groups = set()
572
573         for test_type in test_types:
574             for test in self.tests[test_type]:
575                 group = test.url.split("/")[1]
576                 groups.add(group)
577
578         return groups
579
580
581 class TestSource(object):
582     __metaclass__ = ABCMeta
583
584     def __init__(self, test_queue):
585         self.test_queue = test_queue
586         self.current_group = None
587         self.current_metadata = None
588
589     @abstractmethod
590     #@classmethod (doesn't compose with @abstractmethod)
591     def make_queue(cls, tests, **kwargs):
592         pass
593
594     def group(self):
595         if not self.current_group or len(self.current_group) == 0:
596             try:
597                 self.current_group, self.current_metadata = self.test_queue.get(block=False)
598             except Empty:
599                 return None, None
600         return self.current_group, self.current_metadata
601
602
603 class GroupedSource(TestSource):
604     @classmethod
605     def new_group(cls, state, test, **kwargs):
606         raise NotImplementedError
607
608     @classmethod
609     def make_queue(cls, tests, **kwargs):
610         test_queue = Queue()
611         groups = []
612
613         state = {}
614
615         for test in tests:
616             if cls.new_group(state, test, **kwargs):
617                 groups.append((deque(), {}))
618
619             group, metadata = groups[-1]
620             group.append(test)
621             test.update_metadata(metadata)
622
623         for item in groups:
624             test_queue.put(item)
625         return test_queue
626
627
628 class SingleTestSource(TestSource):
629     @classmethod
630     def make_queue(cls, tests, **kwargs):
631         test_queue = Queue()
632         processes = kwargs["processes"]
633         queues = [deque([]) for _ in xrange(processes)]
634         metadatas = [{} for _ in xrange(processes)]
635         for test in tests:
636             idx = hash(test.id) % processes
637             group = queues[idx]
638             metadata = metadatas[idx]
639             group.append(test)
640             test.update_metadata(metadata)
641
642         for item in zip(queues, metadatas):
643             test_queue.put(item)
644
645         return test_queue
646
647
648 class PathGroupedSource(GroupedSource):
649     @classmethod
650     def new_group(cls, state, test, **kwargs):
651         depth = kwargs.get("depth")
652         if depth is True:
653             depth = None
654         path = urlparse.urlsplit(test.url).path.split("/")[1:-1][:depth]
655         rv = path != state.get("prev_path")
656         state["prev_path"] = path
657         return rv