d244045acb484afbc2878f21cf97f7348afaa4c5
[WebKit-https.git] / Tools / Scripts / webkitpy / common / checkout / baselineoptimizer.py
1 # Copyright (C) 2011, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29
30 # Yes, it's a hypergraph.
31 # FIXME: Should this function live with the ports somewhere?
32 # Perhaps this should move onto PortFactory?
33 def _baseline_search_hypergraph(host):
34     hypergraph = {}
35
36     # These edges in the hypergraph aren't visible on build.webkit.org,
37     # but they impose constraints on how we optimize baselines.
38     hypergraph.update(_VIRTUAL_PORTS)
39
40     # FIXME: Should we get this constant from somewhere?
41     fallback_path = ['LayoutTests']
42
43     port_factory = host.port_factory
44     for port_name in port_factory.all_port_names():
45         port = port_factory.get(port_name)
46         webkit_base = port.webkit_base()
47         search_path = port.baseline_search_path()
48         if search_path:
49             hypergraph[port_name] = [host.filesystem.relpath(path, webkit_base) for path in search_path] + fallback_path
50     return hypergraph
51
52
53 _VIRTUAL_PORTS = {
54     'mac-future': ['LayoutTests/platform/mac-future', 'LayoutTests/platform/mac', 'LayoutTests'],
55     'win-future': ['LayoutTests/platform/win-future', 'LayoutTests/platform/win', 'LayoutTests'],
56     'qt-unknown': ['LayoutTests/platform/qt-unknown', 'LayoutTests/platform/qt', 'LayoutTests'],
57 }
58
59
60 # FIXME: Should this function be somewhere more general?
61 def _invert_dictionary(dictionary):
62     inverted_dictionary = {}
63     for key, value in dictionary.items():
64         if inverted_dictionary.get(value):
65             inverted_dictionary[value].append(key)
66         else:
67             inverted_dictionary[value] = [key]
68     return inverted_dictionary
69
70
71 class BaselineOptimizer(object):
72     def __init__(self, host):
73         self._host = host
74         self._filesystem = self._host.filesystem
75         self._scm = self._host.scm()
76         self._hypergraph = _baseline_search_hypergraph(host)
77         self._directories = reduce(set.union, map(set, self._hypergraph.values()))
78
79     def _read_results_by_directory(self, baseline_name):
80         results_by_directory = {}
81         for directory in self._directories:
82             path = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
83             if self._filesystem.exists(path):
84                 results_by_directory[directory] = self._filesystem.sha1(path)
85         return results_by_directory
86
87     def _results_by_port_name(self, results_by_directory):
88         results_by_port_name = {}
89         for port_name, search_path in self._hypergraph.items():
90             for directory in search_path:
91                 if directory in results_by_directory:
92                     results_by_port_name[port_name] = results_by_directory[directory]
93                     break
94         return results_by_port_name
95
96     def _most_specific_common_directory(self, port_names):
97         paths = [self._hypergraph[port_name] for port_name in port_names]
98         common_directories = reduce(set.intersection, map(set, paths))
99
100         def score(directory):
101             return sum([path.index(directory) for path in paths])
102
103         _, directory = sorted([(score(directory), directory) for directory in common_directories])[0]
104         return directory
105
106     def _filter_port_names_by_result(self, predicate, port_names_by_result):
107         filtered_port_names_by_result = {}
108         for result, port_names in port_names_by_result.items():
109             filtered_port_names = filter(predicate, port_names)
110             if filtered_port_names:
111                 filtered_port_names_by_result[result] = filtered_port_names
112         return filtered_port_names_by_result
113
114     def _place_results_in_most_specific_common_directory(self, port_names_by_result, results_by_directory):
115         for result, port_names in port_names_by_result.items():
116             directory = self._most_specific_common_directory(port_names)
117             results_by_directory[directory] = result
118
119     def _find_optimal_result_placement(self, baseline_name):
120         results_by_directory = self._read_results_by_directory(baseline_name)
121         results_by_port_name = self._results_by_port_name(results_by_directory)
122         port_names_by_result = _invert_dictionary(results_by_port_name)
123
124         new_results_by_directory = {}
125         unsatisfied_port_names_by_result = port_names_by_result
126         while unsatisfied_port_names_by_result:
127             self._place_results_in_most_specific_common_directory(unsatisfied_port_names_by_result, new_results_by_directory)
128             new_results_by_port_name = self._results_by_port_name(new_results_by_directory)
129
130             def is_unsatisfied(port_name):
131                 return results_by_port_name[port_name] != new_results_by_port_name[port_name]
132
133             new_unsatisfied_port_names_by_result = self._filter_port_names_by_result(is_unsatisfied, port_names_by_result)
134
135             if len(new_unsatisfied_port_names_by_result.values()) >= len(unsatisfied_port_names_by_result.values()):
136                 break  # Frowns. We do not appear to be converging.
137             unsatisfied_port_names_by_result = new_unsatisfied_port_names_by_result
138
139         self._filter_virtual_ports(new_results_by_directory)
140         return results_by_directory, new_results_by_directory
141
142     def _filter_virtual_ports(self, new_results_by_directory):
143         for port in _VIRTUAL_PORTS:
144             virtual_directory = _VIRTUAL_PORTS[port][0]
145             if virtual_directory in new_results_by_directory:
146                 real_directory = _VIRTUAL_PORTS[port][1]
147                 if real_directory not in new_results_by_directory:
148                     new_results_by_directory[real_directory] = new_results_by_directory[virtual_directory]
149                 del new_results_by_directory[virtual_directory]
150
151     def _move_baselines(self, baseline_name, results_by_directory, new_results_by_directory):
152         data_for_result = {}
153         for directory, result in results_by_directory.items():
154             if not result in data_for_result:
155                 source = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
156                 data_for_result[result] = self._filesystem.read_binary_file(source)
157
158         file_names = []
159         for directory, result in results_by_directory.items():
160             if new_results_by_directory.get(directory) != result:
161                 file_names.append(self._filesystem.join(self._scm.checkout_root, directory, baseline_name))
162         if file_names:
163             self._scm.delete_list(file_names)
164
165         file_names = []
166         for directory, result in new_results_by_directory.items():
167             if results_by_directory.get(directory) != result:
168                 destination = self._filesystem.join(self._scm.checkout_root, directory, baseline_name)
169                 self._filesystem.maybe_make_directory(self._filesystem.split(destination)[0])
170                 self._filesystem.write_binary_file(destination, data_for_result[result])
171                 file_names.append(destination)
172         if file_names:
173             self._scm.add_list(file_names)
174
175     def directories_by_result(self, baseline_name):
176         results_by_directory = self._read_results_by_directory(baseline_name)
177         return _invert_dictionary(results_by_directory)
178
179     def optimize(self, baseline_name):
180         results_by_directory, new_results_by_directory = self._find_optimal_result_placement(baseline_name)
181         if self._results_by_port_name(results_by_directory) != self._results_by_port_name(new_results_by_directory):
182             return False
183         self._move_baselines(baseline_name, results_by_directory, new_results_by_directory)
184         return True