98e2fae784f299f0e05953666f8b21e773f7f61f
[WebKit.git] / Tools / Scripts / webkitpy / common / net / buildbot / buildbot.py
1 # Copyright (c) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for interacting with WebKit's buildbot
30
31 try:
32     import json
33 except ImportError:
34     # python 2.5 compatibility
35     import webkitpy.thirdparty.simplejson as json
36
37 import operator
38 import re
39 import urllib
40 import urllib2
41
42 from webkitpy.common.net.failuremap import FailureMap
43 from webkitpy.common.net.layouttestresults import LayoutTestResults
44 from webkitpy.common.net.regressionwindow import RegressionWindow
45 from webkitpy.common.system.logutils import get_logger
46 from webkitpy.thirdparty.autoinstalled.mechanize import Browser
47 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
48
49 _log = get_logger(__file__)
50
51
52 class Builder(object):
53     def __init__(self, name, buildbot):
54         self._name = name
55         self._buildbot = buildbot
56         self._builds_cache = {}
57         self._revision_to_build_number = None
58         self._browser = Browser()
59         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
60
61     def name(self):
62         return self._name
63
64     def results_url(self):
65         return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
66
67     def url_encoded_name(self):
68         return urllib.quote(self._name)
69
70     def url(self):
71         return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
72
73     # This provides a single place to mock
74     def _fetch_build(self, build_number):
75         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
76         if not build_dictionary:
77             return None
78         return Build(self,
79             build_number=int(build_dictionary['number']),
80             revision=int(build_dictionary['sourceStamp']['revision']),
81             is_green=(build_dictionary['results'] == 0) # Undocumented, 0 seems to mean "pass"
82         )
83
84     def build(self, build_number):
85         if not build_number:
86             return None
87         cached_build = self._builds_cache.get(build_number)
88         if cached_build:
89             return cached_build
90
91         build = self._fetch_build(build_number)
92         self._builds_cache[build_number] = build
93         return build
94
95     def force_build(self, username="webkit-patch", comments=None):
96         def predicate(form):
97             try:
98                 return form.find_control("username")
99             except Exception, e:
100                 return False
101         self._browser.open(self.url())
102         self._browser.select_form(predicate=predicate)
103         self._browser["username"] = username
104         if comments:
105             self._browser["comments"] = comments
106         return self._browser.submit()
107
108     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
109     def _revision_and_build_for_filename(self, filename):
110         # Example: "r47483 (1)/" or "r47483 (1).zip"
111         match = self.file_name_regexp.match(filename)
112         return (int(match.group("revision")), int(match.group("build_number")))
113
114     def _fetch_revision_to_build_map(self):
115         # All _fetch requests go through _buildbot for easier mocking
116         # FIXME: This should use NetworkTransaction's 404 handling instead.
117         try:
118             # FIXME: This method is horribly slow due to the huge network load.
119             # FIXME: This is a poor way to do revision -> build mapping.
120             # Better would be to ask buildbot through some sort of API.
121             print "Loading revision/build list from %s." % self.results_url()
122             print "This may take a while..."
123             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
124         except urllib2.HTTPError, error:
125             if error.code != 404:
126                 raise
127             result_files = []
128
129         # This assumes there was only one build per revision, which is false but we don't care for now.
130         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
131
132     def _revision_to_build_map(self):
133         if not self._revision_to_build_number:
134             self._revision_to_build_number = self._fetch_revision_to_build_map()
135         return self._revision_to_build_number
136
137     def revision_build_pairs_with_results(self):
138         return self._revision_to_build_map().items()
139
140     # This assumes there can be only one build per revision, which is false, but we don't care for now.
141     def build_for_revision(self, revision, allow_failed_lookups=False):
142         # NOTE: This lookup will fail if that exact revision was never built.
143         build_number = self._revision_to_build_map().get(int(revision))
144         if not build_number:
145             return None
146         build = self.build(build_number)
147         if not build and allow_failed_lookups:
148             # Builds for old revisions with fail to lookup via buildbot's json api.
149             build = Build(self,
150                 build_number=build_number,
151                 revision=revision,
152                 is_green=False,
153             )
154         return build
155
156     def find_regression_window(self, red_build, look_back_limit=30):
157         if not red_build or red_build.is_green():
158             return RegressionWindow(None, None)
159         common_failures = None
160         current_build = red_build
161         build_after_current_build = None
162         look_back_count = 0
163         while current_build:
164             if current_build.is_green():
165                 # current_build can't possibly have any failures in common
166                 # with red_build because it's green.
167                 break
168             results = current_build.layout_test_results()
169             # We treat a lack of results as if all the test failed.
170             # This occurs, for example, when we can't compile at all.
171             if results:
172                 failures = set(results.failing_tests())
173                 if common_failures == None:
174                     common_failures = failures
175                 else:
176                     common_failures = common_failures.intersection(failures)
177                     if not common_failures:
178                         # current_build doesn't have any failures in common with
179                         # the red build we're worried about.  We assume that any
180                         # failures in current_build were due to flakiness.
181                         break
182             look_back_count += 1
183             if look_back_count > look_back_limit:
184                 return RegressionWindow(None, current_build, failing_tests=common_failures)
185             build_after_current_build = current_build
186             current_build = current_build.previous_build()
187         # We must iterate at least once because red_build is red.
188         assert(build_after_current_build)
189         # Current build must either be green or have no failures in common
190         # with red build, so we've found our failure transition.
191         return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
192
193     def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
194         red_build = self.build(red_build_number)
195         regression_window = self.find_regression_window(red_build, look_back_limit)
196         if not regression_window.build_before_failure():
197             return None  # We ran off the limit of our search
198         # If avoid_flakey_tests, require at least 2 bad builds before we
199         # suspect a real failure transition.
200         if avoid_flakey_tests and regression_window.failing_build() == red_build:
201             return None
202         return regression_window
203
204
205 class Build(object):
206     def __init__(self, builder, build_number, revision, is_green):
207         self._builder = builder
208         self._number = build_number
209         self._revision = revision
210         self._is_green = is_green
211         self._layout_test_results = None
212
213     @staticmethod
214     def build_url(builder, build_number):
215         return "%s/builds/%s" % (builder.url(), build_number)
216
217     def url(self):
218         return self.build_url(self.builder(), self._number)
219
220     def results_url(self):
221         results_directory = "r%s (%s)" % (self.revision(), self._number)
222         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
223
224     def _fetch_results_html(self):
225         results_html = "%s/results.html" % (self.results_url())
226         # FIXME: This should use NetworkTransaction's 404 handling instead.
227         try:
228             # It seems this can return None if the url redirects and then returns 404.
229             return urllib2.urlopen(results_html)
230         except urllib2.HTTPError, error:
231             if error.code != 404:
232                 raise
233
234     def layout_test_results(self):
235         if not self._layout_test_results:
236             # FIXME: This should cache that the result was a 404 and stop hitting the network.
237             self._layout_test_results = LayoutTestResults.results_from_string(self._fetch_results_html())
238         return self._layout_test_results
239
240     def builder(self):
241         return self._builder
242
243     def revision(self):
244         return self._revision
245
246     def is_green(self):
247         return self._is_green
248
249     def previous_build(self):
250         # previous_build() allows callers to avoid assuming build numbers are sequential.
251         # They may not be sequential across all master changes, or when non-trunk builds are made.
252         return self._builder.build(self._number - 1)
253
254
255 class BuildBot(object):
256     # FIXME: This should move into some sort of webkit_config.py
257     default_host = "build.webkit.org"
258
259     def __init__(self, host=default_host):
260         self.buildbot_host = host
261         self._builder_by_name = {}
262
263         # If any core builder is red we should not be landing patches.  Other
264         # builders should be added to this list once they are known to be
265         # reliable.
266         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
267         self.core_builder_names_regexps = [
268             "SnowLeopard.*Build",
269             "SnowLeopard.*\(Test",  # Exclude WebKit2 for now.
270             "Leopard",
271             "Tiger",
272             "Windows.*Build",
273             "EFL",
274             "GTK.*32",
275             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
276             "Qt",
277             "Chromium.*Release$",
278         ]
279
280     def _parse_last_build_cell(self, builder, cell):
281         status_link = cell.find('a')
282         if status_link:
283             # Will be either a revision number or a build number
284             revision_string = status_link.string
285             # If revision_string has non-digits assume it's not a revision number.
286             builder['built_revision'] = int(revision_string) \
287                                         if not re.match('\D', revision_string) \
288                                         else None
289
290             # FIXME: We treat slave lost as green even though it is not to
291             # work around the Qts bot being on a broken internet connection.
292             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
293             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
294                                   not not re.search('lost', cell.renderContents())
295
296             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
297             link_match = re.match(status_link_regexp, status_link['href'])
298             builder['build_number'] = int(link_match.group("build_number"))
299         else:
300             # We failed to find a link in the first cell, just give up.  This
301             # can happen if a builder is just-added, the first cell will just
302             # be "no build"
303             # Other parts of the code depend on is_green being present.
304             builder['is_green'] = False
305             builder['built_revision'] = None
306             builder['build_number'] = None
307
308     def _parse_current_build_cell(self, builder, cell):
309         activity_lines = cell.renderContents().split("<br />")
310         builder["activity"] = activity_lines[0] # normally "building" or "idle"
311         # The middle lines document how long left for any current builds.
312         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
313         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
314
315     def _parse_builder_status_from_row(self, status_row):
316         status_cells = status_row.findAll('td')
317         builder = {}
318
319         # First cell is the name
320         name_link = status_cells[0].find('a')
321         builder["name"] = unicode(name_link.string)
322
323         self._parse_last_build_cell(builder, status_cells[1])
324         self._parse_current_build_cell(builder, status_cells[2])
325         return builder
326
327     def _matches_regexps(self, builder_name, name_regexps):
328         for name_regexp in name_regexps:
329             if re.match(name_regexp, builder_name):
330                 return True
331         return False
332
333     # FIXME: Should move onto Builder
334     def _is_core_builder(self, builder_name):
335         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
336
337     # FIXME: This method needs to die, but is used by a unit test at the moment.
338     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
339         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
340
341     def red_core_builders(self):
342         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
343
344     def red_core_builders_names(self):
345         return [builder["name"] for builder in self.red_core_builders()]
346
347     def idle_red_core_builders(self):
348         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
349
350     def core_builders_are_green(self):
351         return not self.red_core_builders()
352
353     # FIXME: These _fetch methods should move to a networking class.
354     def _fetch_build_dictionary(self, builder, build_number):
355         try:
356             base = "http://%s" % self.buildbot_host
357             path = urllib.quote("json/builders/%s/builds/%s" % (builder.name(),
358                                                                 build_number))
359             url = "%s/%s" % (base, path)
360             jsondata = urllib2.urlopen(url)
361             return json.load(jsondata)
362         except urllib2.URLError, err:
363             build_url = Build.build_url(builder, build_number)
364             _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
365             return None
366         except ValueError, err:
367             build_url = Build.build_url(builder, build_number)
368             _log.error("Error decoding json data from %s: %s" % (build_url, err))
369             return None
370
371     def _fetch_one_box_per_builder(self):
372         build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
373         return urllib2.urlopen(build_status_url)
374
375     def _file_cell_text(self, file_cell):
376         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
377         element = file_cell
378         while element.string is None and element.contents:
379             element = element.contents[0]
380         return element.string
381
382     def _parse_twisted_file_row(self, file_row):
383         string_or_empty = lambda string: unicode(string) if string else u""
384         file_cells = file_row.findAll('td')
385         return {
386             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
387             "size": string_or_empty(self._file_cell_text(file_cells[1])),
388             "type": string_or_empty(self._file_cell_text(file_cells[2])),
389             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
390         }
391
392     def _parse_twisted_directory_listing(self, page):
393         soup = BeautifulSoup(page)
394         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
395         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
396         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
397
398     # FIXME: There should be a better way to get this information directly from twisted.
399     def _fetch_twisted_directory_listing(self, url):
400         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
401
402     def builders(self):
403         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
404
405     # This method pulls from /one_box_per_builder as an efficient way to get information about
406     def builder_statuses(self):
407         soup = BeautifulSoup(self._fetch_one_box_per_builder())
408         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
409
410     def core_builder_statuses(self):
411         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
412
413     def builder_with_name(self, name):
414         builder = self._builder_by_name.get(name)
415         if not builder:
416             builder = Builder(name, self)
417             self._builder_by_name[name] = builder
418         return builder
419
420     def failure_map(self, only_core_builders=True):
421         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
422         failure_map = FailureMap()
423         revision_to_failing_bots = {}
424         for builder_status in builder_statuses:
425             if builder_status["is_green"]:
426                 continue
427             builder = self.builder_with_name(builder_status["name"])
428             regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
429             if regression_window:
430                 failure_map.add_regression_window(builder, regression_window)
431         return failure_map
432
433     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
434     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
435     def _latest_builds_from_builders(self, only_core_builders=True):
436         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
437         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
438
439     def _build_at_or_before_revision(self, build, revision):
440         while build:
441             if build.revision() <= revision:
442                 return build
443             build = build.previous_build()
444
445     def last_green_revision(self, only_core_builders=True):
446         builds = self._latest_builds_from_builders(only_core_builders)
447         target_revision = builds[0].revision()
448         # An alternate way to do this would be to start at one revision and walk backwards
449         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
450         while True:
451             # Make builds agree on revision
452             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
453             if None in builds: # One of the builds failed to load from the server.
454                 return None
455             min_revision = min(map(lambda build: build.revision(), builds))
456             if min_revision != target_revision:
457                 target_revision = min_revision
458                 continue # Builds don't all agree on revision, keep searching
459             # Check to make sure they're all green
460             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
461             if not all_are_green:
462                 target_revision -= 1
463                 continue
464             return min_revision