2011-02-06 Maciej Stachowiak <mjs@apple.com>
[WebKit.git] / Tools / Scripts / webkitpy / common / net / buildbot / buildbot.py
1 # Copyright (c) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for interacting with WebKit's buildbot
30
31 try:
32     import json
33 except ImportError:
34     # python 2.5 compatibility
35     import webkitpy.thirdparty.simplejson as json
36
37 import operator
38 import re
39 import urllib
40 import urllib2
41
42 from webkitpy.common.net.failuremap import FailureMap
43 from webkitpy.common.net.layouttestresults import LayoutTestResults
44 from webkitpy.common.net.regressionwindow import RegressionWindow
45 from webkitpy.common.system.logutils import get_logger
46 from webkitpy.thirdparty.autoinstalled.mechanize import Browser
47 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
48
49 _log = get_logger(__file__)
50
51
52 class Builder(object):
53     def __init__(self, name, buildbot):
54         self._name = name
55         self._buildbot = buildbot
56         self._builds_cache = {}
57         self._revision_to_build_number = None
58         self._browser = Browser()
59         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
60
61     def name(self):
62         return self._name
63
64     def results_url(self):
65         return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
66
67     def url_encoded_name(self):
68         return urllib.quote(self._name)
69
70     def url(self):
71         return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
72
73     # This provides a single place to mock
74     def _fetch_build(self, build_number):
75         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
76         if not build_dictionary:
77             return None
78         return Build(self,
79             build_number=int(build_dictionary['number']),
80             revision=int(build_dictionary['sourceStamp']['revision']),
81             is_green=(build_dictionary['results'] == 0) # Undocumented, 0 seems to mean "pass"
82         )
83
84     def build(self, build_number):
85         if not build_number:
86             return None
87         cached_build = self._builds_cache.get(build_number)
88         if cached_build:
89             return cached_build
90
91         build = self._fetch_build(build_number)
92         self._builds_cache[build_number] = build
93         return build
94
95     def force_build(self, username="webkit-patch", comments=None):
96         def predicate(form):
97             try:
98                 return form.find_control("username")
99             except Exception, e:
100                 return False
101         self._browser.open(self.url())
102         self._browser.select_form(predicate=predicate)
103         self._browser["username"] = username
104         if comments:
105             self._browser["comments"] = comments
106         return self._browser.submit()
107
108     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
109     def _revision_and_build_for_filename(self, filename):
110         # Example: "r47483 (1)/" or "r47483 (1).zip"
111         match = self.file_name_regexp.match(filename)
112         return (int(match.group("revision")), int(match.group("build_number")))
113
114     def _fetch_revision_to_build_map(self):
115         # All _fetch requests go through _buildbot for easier mocking
116         # FIXME: This should use NetworkTransaction's 404 handling instead.
117         try:
118             # FIXME: This method is horribly slow due to the huge network load.
119             # FIXME: This is a poor way to do revision -> build mapping.
120             # Better would be to ask buildbot through some sort of API.
121             print "Loading revision/build list from %s." % self.results_url()
122             print "This may take a while..."
123             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
124         except urllib2.HTTPError, error:
125             if error.code != 404:
126                 raise
127             result_files = []
128
129         # This assumes there was only one build per revision, which is false but we don't care for now.
130         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
131
132     def _revision_to_build_map(self):
133         if not self._revision_to_build_number:
134             self._revision_to_build_number = self._fetch_revision_to_build_map()
135         return self._revision_to_build_number
136
137     def revision_build_pairs_with_results(self):
138         return self._revision_to_build_map().items()
139
140     # This assumes there can be only one build per revision, which is false, but we don't care for now.
141     def build_for_revision(self, revision, allow_failed_lookups=False):
142         # NOTE: This lookup will fail if that exact revision was never built.
143         build_number = self._revision_to_build_map().get(int(revision))
144         if not build_number:
145             return None
146         build = self.build(build_number)
147         if not build and allow_failed_lookups:
148             # Builds for old revisions with fail to lookup via buildbot's json api.
149             build = Build(self,
150                 build_number=build_number,
151                 revision=revision,
152                 is_green=False,
153             )
154         return build
155
156     def find_regression_window(self, red_build, look_back_limit=30):
157         if not red_build or red_build.is_green():
158             return RegressionWindow(None, None)
159         common_failures = None
160         current_build = red_build
161         build_after_current_build = None
162         look_back_count = 0
163         while current_build:
164             if current_build.is_green():
165                 # current_build can't possibly have any failures in common
166                 # with red_build because it's green.
167                 break
168             results = current_build.layout_test_results()
169             # We treat a lack of results as if all the test failed.
170             # This occurs, for example, when we can't compile at all.
171             if results:
172                 failures = set(results.failing_tests())
173                 if common_failures == None:
174                     common_failures = failures
175                 else:
176                     common_failures = common_failures.intersection(failures)
177                     if not common_failures:
178                         # current_build doesn't have any failures in common with
179                         # the red build we're worried about.  We assume that any
180                         # failures in current_build were due to flakiness.
181                         break
182             look_back_count += 1
183             if look_back_count > look_back_limit:
184                 return RegressionWindow(None, current_build, failing_tests=common_failures)
185             build_after_current_build = current_build
186             current_build = current_build.previous_build()
187         # We must iterate at least once because red_build is red.
188         assert(build_after_current_build)
189         # Current build must either be green or have no failures in common
190         # with red build, so we've found our failure transition.
191         return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
192
193     def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
194         red_build = self.build(red_build_number)
195         regression_window = self.find_regression_window(red_build, look_back_limit)
196         if not regression_window.build_before_failure():
197             return None  # We ran off the limit of our search
198         # If avoid_flakey_tests, require at least 2 bad builds before we
199         # suspect a real failure transition.
200         if avoid_flakey_tests and regression_window.failing_build() == red_build:
201             return None
202         return regression_window
203
204
205 class Build(object):
206     def __init__(self, builder, build_number, revision, is_green):
207         self._builder = builder
208         self._number = build_number
209         self._revision = revision
210         self._is_green = is_green
211         self._layout_test_results = None
212
213     @staticmethod
214     def build_url(builder, build_number):
215         return "%s/builds/%s" % (builder.url(), build_number)
216
217     def url(self):
218         return self.build_url(self.builder(), self._number)
219
220     def results_url(self):
221         results_directory = "r%s (%s)" % (self.revision(), self._number)
222         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
223
224     def _fetch_results_html(self):
225         results_html = "%s/results.html" % (self.results_url())
226         # FIXME: This should use NetworkTransaction's 404 handling instead.
227         try:
228             # It seems this can return None if the url redirects and then returns 404.
229             return urllib2.urlopen(results_html)
230         except urllib2.HTTPError, error:
231             if error.code != 404:
232                 raise
233
234     def layout_test_results(self):
235         if not self._layout_test_results:
236             # FIXME: This should cache that the result was a 404 and stop hitting the network.
237             self._layout_test_results = LayoutTestResults.results_from_string(self._fetch_results_html())
238         return self._layout_test_results
239
240     def builder(self):
241         return self._builder
242
243     def revision(self):
244         return self._revision
245
246     def is_green(self):
247         return self._is_green
248
249     def previous_build(self):
250         # previous_build() allows callers to avoid assuming build numbers are sequential.
251         # They may not be sequential across all master changes, or when non-trunk builds are made.
252         return self._builder.build(self._number - 1)
253
254
255 class BuildBot(object):
256     # FIXME: This should move into some sort of webkit_config.py
257     default_host = "build.webkit.org"
258
259     def __init__(self, host=default_host):
260         self.buildbot_host = host
261         self._builder_by_name = {}
262
263         # If any core builder is red we should not be landing patches.  Other
264         # builders should be added to this list once they are known to be
265         # reliable.
266         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
267         self.core_builder_names_regexps = [
268             "SnowLeopard.*Build",
269             "SnowLeopard.*\(Test",
270             "SnowLeopard.*\(WebKit2 Test",
271             "Leopard",
272             "Tiger",
273             "Windows.*Build",
274             "EFL",
275             "GTK.*32",
276             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
277             "Qt",
278             "Chromium.*Release$",
279         ]
280
281     def _parse_last_build_cell(self, builder, cell):
282         status_link = cell.find('a')
283         if status_link:
284             # Will be either a revision number or a build number
285             revision_string = status_link.string
286             # If revision_string has non-digits assume it's not a revision number.
287             builder['built_revision'] = int(revision_string) \
288                                         if not re.match('\D', revision_string) \
289                                         else None
290
291             # FIXME: We treat slave lost as green even though it is not to
292             # work around the Qts bot being on a broken internet connection.
293             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
294             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
295                                   not not re.search('lost', cell.renderContents())
296
297             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
298             link_match = re.match(status_link_regexp, status_link['href'])
299             builder['build_number'] = int(link_match.group("build_number"))
300         else:
301             # We failed to find a link in the first cell, just give up.  This
302             # can happen if a builder is just-added, the first cell will just
303             # be "no build"
304             # Other parts of the code depend on is_green being present.
305             builder['is_green'] = False
306             builder['built_revision'] = None
307             builder['build_number'] = None
308
309     def _parse_current_build_cell(self, builder, cell):
310         activity_lines = cell.renderContents().split("<br />")
311         builder["activity"] = activity_lines[0] # normally "building" or "idle"
312         # The middle lines document how long left for any current builds.
313         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
314         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
315
316     def _parse_builder_status_from_row(self, status_row):
317         status_cells = status_row.findAll('td')
318         builder = {}
319
320         # First cell is the name
321         name_link = status_cells[0].find('a')
322         builder["name"] = unicode(name_link.string)
323
324         self._parse_last_build_cell(builder, status_cells[1])
325         self._parse_current_build_cell(builder, status_cells[2])
326         return builder
327
328     def _matches_regexps(self, builder_name, name_regexps):
329         for name_regexp in name_regexps:
330             if re.match(name_regexp, builder_name):
331                 return True
332         return False
333
334     # FIXME: Should move onto Builder
335     def _is_core_builder(self, builder_name):
336         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
337
338     # FIXME: This method needs to die, but is used by a unit test at the moment.
339     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
340         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
341
342     def red_core_builders(self):
343         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
344
345     def red_core_builders_names(self):
346         return [builder["name"] for builder in self.red_core_builders()]
347
348     def idle_red_core_builders(self):
349         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
350
351     def core_builders_are_green(self):
352         return not self.red_core_builders()
353
354     # FIXME: These _fetch methods should move to a networking class.
355     def _fetch_build_dictionary(self, builder, build_number):
356         try:
357             base = "http://%s" % self.buildbot_host
358             path = urllib.quote("json/builders/%s/builds/%s" % (builder.name(),
359                                                                 build_number))
360             url = "%s/%s" % (base, path)
361             jsondata = urllib2.urlopen(url)
362             return json.load(jsondata)
363         except urllib2.URLError, err:
364             build_url = Build.build_url(builder, build_number)
365             _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
366             return None
367         except ValueError, err:
368             build_url = Build.build_url(builder, build_number)
369             _log.error("Error decoding json data from %s: %s" % (build_url, err))
370             return None
371
372     def _fetch_one_box_per_builder(self):
373         build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
374         return urllib2.urlopen(build_status_url)
375
376     def _file_cell_text(self, file_cell):
377         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
378         element = file_cell
379         while element.string is None and element.contents:
380             element = element.contents[0]
381         return element.string
382
383     def _parse_twisted_file_row(self, file_row):
384         string_or_empty = lambda string: unicode(string) if string else u""
385         file_cells = file_row.findAll('td')
386         return {
387             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
388             "size": string_or_empty(self._file_cell_text(file_cells[1])),
389             "type": string_or_empty(self._file_cell_text(file_cells[2])),
390             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
391         }
392
393     def _parse_twisted_directory_listing(self, page):
394         soup = BeautifulSoup(page)
395         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
396         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
397         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
398
399     # FIXME: There should be a better way to get this information directly from twisted.
400     def _fetch_twisted_directory_listing(self, url):
401         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
402
403     def builders(self):
404         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
405
406     # This method pulls from /one_box_per_builder as an efficient way to get information about
407     def builder_statuses(self):
408         soup = BeautifulSoup(self._fetch_one_box_per_builder())
409         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
410
411     def core_builder_statuses(self):
412         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
413
414     def builder_with_name(self, name):
415         builder = self._builder_by_name.get(name)
416         if not builder:
417             builder = Builder(name, self)
418             self._builder_by_name[name] = builder
419         return builder
420
421     def failure_map(self, only_core_builders=True):
422         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
423         failure_map = FailureMap()
424         revision_to_failing_bots = {}
425         for builder_status in builder_statuses:
426             if builder_status["is_green"]:
427                 continue
428             builder = self.builder_with_name(builder_status["name"])
429             regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
430             if regression_window:
431                 failure_map.add_regression_window(builder, regression_window)
432         return failure_map
433
434     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
435     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
436     def _latest_builds_from_builders(self, only_core_builders=True):
437         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
438         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
439
440     def _build_at_or_before_revision(self, build, revision):
441         while build:
442             if build.revision() <= revision:
443                 return build
444             build = build.previous_build()
445
446     def last_green_revision(self, only_core_builders=True):
447         builds = self._latest_builds_from_builders(only_core_builders)
448         target_revision = builds[0].revision()
449         # An alternate way to do this would be to start at one revision and walk backwards
450         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
451         while True:
452             # Make builds agree on revision
453             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
454             if None in builds: # One of the builds failed to load from the server.
455                 return None
456             min_revision = min(map(lambda build: build.revision(), builds))
457             if min_revision != target_revision:
458                 target_revision = min_revision
459                 continue # Builds don't all agree on revision, keep searching
460             # Check to make sure they're all green
461             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
462             if not all_are_green:
463                 target_revision -= 1
464                 continue
465             return min_revision