aa5fabb256063d2b9606eda9d2fbbd46c1e652b8
[WebKit-https.git] / Tools / Scripts / webkitpy / common / net / buildbot / buildbot.py
1 # Copyright (c) 2009, Google Inc. All rights reserved.
2 #
3 # Redistribution and use in source and binary forms, with or without
4 # modification, are permitted provided that the following conditions are
5 # met:
6 #
7 #     * Redistributions of source code must retain the above copyright
8 # notice, this list of conditions and the following disclaimer.
9 #     * Redistributions in binary form must reproduce the above
10 # copyright notice, this list of conditions and the following disclaimer
11 # in the documentation and/or other materials provided with the
12 # distribution.
13 #     * Neither the name of Google Inc. nor the names of its
14 # contributors may be used to endorse or promote products derived from
15 # this software without specific prior written permission.
16 #
17 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #
29 # WebKit's Python module for interacting with WebKit's buildbot
30
31 try:
32     import json
33 except ImportError:
34     # python 2.5 compatibility
35     import webkitpy.thirdparty.simplejson as json
36
37 import operator
38 import re
39 import urllib
40 import urllib2
41
42 from webkitpy.common.net.failuremap import FailureMap
43 from webkitpy.common.net.layouttestresults import LayoutTestResults
44 from webkitpy.common.net.regressionwindow import RegressionWindow
45 from webkitpy.common.system.logutils import get_logger
46 from webkitpy.thirdparty.autoinstalled.mechanize import Browser
47 from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
48
49 _log = get_logger(__file__)
50
51
52 class Builder(object):
53     def __init__(self, name, buildbot):
54         self._name = name
55         self._buildbot = buildbot
56         self._builds_cache = {}
57         self._revision_to_build_number = None
58         self._browser = Browser()
59         self._browser.set_handle_robots(False) # The builder pages are excluded by robots.txt
60
61     def name(self):
62         return self._name
63
64     def results_url(self):
65         return "http://%s/results/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
66
67     def url_encoded_name(self):
68         return urllib.quote(self._name)
69
70     def url(self):
71         return "http://%s/builders/%s" % (self._buildbot.buildbot_host, self.url_encoded_name())
72
73     # This provides a single place to mock
74     def _fetch_build(self, build_number):
75         build_dictionary = self._buildbot._fetch_build_dictionary(self, build_number)
76         if not build_dictionary:
77             return None
78         return Build(self,
79             build_number=int(build_dictionary['number']),
80             revision=int(build_dictionary['sourceStamp']['revision']),
81             is_green=(build_dictionary['results'] == 0) # Undocumented, 0 seems to mean "pass"
82         )
83
84     def build(self, build_number):
85         if not build_number:
86             return None
87         cached_build = self._builds_cache.get(build_number)
88         if cached_build:
89             return cached_build
90
91         build = self._fetch_build(build_number)
92         self._builds_cache[build_number] = build
93         return build
94
95     def force_build(self, username="webkit-patch", comments=None):
96         def predicate(form):
97             try:
98                 return form.find_control("username")
99             except Exception, e:
100                 return False
101         self._browser.open(self.url())
102         self._browser.select_form(predicate=predicate)
103         self._browser["username"] = username
104         if comments:
105             self._browser["comments"] = comments
106         return self._browser.submit()
107
108     file_name_regexp = re.compile(r"r(?P<revision>\d+) \((?P<build_number>\d+)\)")
109     def _revision_and_build_for_filename(self, filename):
110         # Example: "r47483 (1)/" or "r47483 (1).zip"
111         match = self.file_name_regexp.match(filename)
112         return (int(match.group("revision")), int(match.group("build_number")))
113
114     def _fetch_revision_to_build_map(self):
115         # All _fetch requests go through _buildbot for easier mocking
116         # FIXME: This should use NetworkTransaction's 404 handling instead.
117         try:
118             # FIXME: This method is horribly slow due to the huge network load.
119             # FIXME: This is a poor way to do revision -> build mapping.
120             # Better would be to ask buildbot through some sort of API.
121             print "Loading revision/build list from %s." % self.results_url()
122             print "This may take a while..."
123             result_files = self._buildbot._fetch_twisted_directory_listing(self.results_url())
124         except urllib2.HTTPError, error:
125             if error.code != 404:
126                 raise
127             result_files = []
128
129         # This assumes there was only one build per revision, which is false but we don't care for now.
130         return dict([self._revision_and_build_for_filename(file_info["filename"]) for file_info in result_files])
131
132     def _revision_to_build_map(self):
133         if not self._revision_to_build_number:
134             self._revision_to_build_number = self._fetch_revision_to_build_map()
135         return self._revision_to_build_number
136
137     def revision_build_pairs_with_results(self):
138         return self._revision_to_build_map().items()
139
140     # This assumes there can be only one build per revision, which is false, but we don't care for now.
141     def build_for_revision(self, revision, allow_failed_lookups=False):
142         # NOTE: This lookup will fail if that exact revision was never built.
143         build_number = self._revision_to_build_map().get(int(revision))
144         if not build_number:
145             return None
146         build = self.build(build_number)
147         if not build and allow_failed_lookups:
148             # Builds for old revisions with fail to lookup via buildbot's json api.
149             build = Build(self,
150                 build_number=build_number,
151                 revision=revision,
152                 is_green=False,
153             )
154         return build
155
156     def find_regression_window(self, red_build, look_back_limit=30):
157         if not red_build or red_build.is_green():
158             return RegressionWindow(None, None)
159         common_failures = None
160         current_build = red_build
161         build_after_current_build = None
162         look_back_count = 0
163         while current_build:
164             if current_build.is_green():
165                 # current_build can't possibly have any failures in common
166                 # with red_build because it's green.
167                 break
168             results = current_build.layout_test_results()
169             # We treat a lack of results as if all the test failed.
170             # This occurs, for example, when we can't compile at all.
171             if results:
172                 failures = set(results.failing_tests())
173                 if common_failures == None:
174                     common_failures = failures
175                 else:
176                     common_failures = common_failures.intersection(failures)
177                     if not common_failures:
178                         # current_build doesn't have any failures in common with
179                         # the red build we're worried about.  We assume that any
180                         # failures in current_build were due to flakiness.
181                         break
182             look_back_count += 1
183             if look_back_count > look_back_limit:
184                 return RegressionWindow(None, current_build, failing_tests=common_failures)
185             build_after_current_build = current_build
186             current_build = current_build.previous_build()
187         # We must iterate at least once because red_build is red.
188         assert(build_after_current_build)
189         # Current build must either be green or have no failures in common
190         # with red build, so we've found our failure transition.
191         return RegressionWindow(current_build, build_after_current_build, failing_tests=common_failures)
192
193     def find_blameworthy_regression_window(self, red_build_number, look_back_limit=30, avoid_flakey_tests=True):
194         red_build = self.build(red_build_number)
195         regression_window = self.find_regression_window(red_build, look_back_limit)
196         if not regression_window.build_before_failure():
197             return None  # We ran off the limit of our search
198         # If avoid_flakey_tests, require at least 2 bad builds before we
199         # suspect a real failure transition.
200         if avoid_flakey_tests and regression_window.failing_build() == red_build:
201             return None
202         return regression_window
203
204
205 class Build(object):
206     def __init__(self, builder, build_number, revision, is_green):
207         self._builder = builder
208         self._number = build_number
209         self._revision = revision
210         self._is_green = is_green
211         self._layout_test_results = None
212
213     @staticmethod
214     def build_url(builder, build_number):
215         return "%s/builds/%s" % (builder.url(), build_number)
216
217     def url(self):
218         return self.build_url(self.builder(), self._number)
219
220     def results_url(self):
221         results_directory = "r%s (%s)" % (self.revision(), self._number)
222         return "%s/%s" % (self._builder.results_url(), urllib.quote(results_directory))
223
224     def _fetch_results_html(self):
225         results_html = "%s/results.html" % (self.results_url())
226         # FIXME: This should use NetworkTransaction's 404 handling instead.
227         try:
228             # It seems this can return None if the url redirects and then returns 404.
229             return urllib2.urlopen(results_html)
230         except urllib2.HTTPError, error:
231             if error.code != 404:
232                 raise
233
234     def layout_test_results(self):
235         if not self._layout_test_results:
236             # FIXME: This should cache that the result was a 404 and stop hitting the network.
237             self._layout_test_results = LayoutTestResults.results_from_string(self._fetch_results_html())
238         return self._layout_test_results
239
240     def builder(self):
241         return self._builder
242
243     def revision(self):
244         return self._revision
245
246     def is_green(self):
247         return self._is_green
248
249     def previous_build(self):
250         # previous_build() allows callers to avoid assuming build numbers are sequential.
251         # They may not be sequential across all master changes, or when non-trunk builds are made.
252         return self._builder.build(self._number - 1)
253
254
255 class BuildBot(object):
256     # FIXME: This should move into some sort of webkit_config.py
257     default_host = "build.webkit.org"
258
259     def __init__(self, host=default_host):
260         self.buildbot_host = host
261         self._builder_by_name = {}
262
263         # If any core builder is red we should not be landing patches.  Other
264         # builders should be added to this list once they are known to be
265         # reliable.
266         # See https://bugs.webkit.org/show_bug.cgi?id=33296 and related bugs.
267         self.core_builder_names_regexps = [
268             "SnowLeopard.*Build",
269             "SnowLeopard.*\(Test",
270             "SnowLeopard.*\(WebKit2 Test",
271             "Leopard.*Release",
272             "Windows.*Build",
273             "Windows.*Release.*\(Test",
274             "WinCE",
275             "EFL",
276             "GTK.*32",
277             "GTK.*64.*Debug",  # Disallow the 64-bit Release bot which is broken.
278             "Qt",
279             "Chromium.*Release$",
280         ]
281
282     def _parse_last_build_cell(self, builder, cell):
283         status_link = cell.find('a')
284         if status_link:
285             # Will be either a revision number or a build number
286             revision_string = status_link.string
287             # If revision_string has non-digits assume it's not a revision number.
288             builder['built_revision'] = int(revision_string) \
289                                         if not re.match('\D', revision_string) \
290                                         else None
291
292             # FIXME: We treat slave lost as green even though it is not to
293             # work around the Qts bot being on a broken internet connection.
294             # The real fix is https://bugs.webkit.org/show_bug.cgi?id=37099
295             builder['is_green'] = not re.search('fail', cell.renderContents()) or \
296                                   not not re.search('lost', cell.renderContents())
297
298             status_link_regexp = r"builders/(?P<builder_name>.*)/builds/(?P<build_number>\d+)"
299             link_match = re.match(status_link_regexp, status_link['href'])
300             builder['build_number'] = int(link_match.group("build_number"))
301         else:
302             # We failed to find a link in the first cell, just give up.  This
303             # can happen if a builder is just-added, the first cell will just
304             # be "no build"
305             # Other parts of the code depend on is_green being present.
306             builder['is_green'] = False
307             builder['built_revision'] = None
308             builder['build_number'] = None
309
310     def _parse_current_build_cell(self, builder, cell):
311         activity_lines = cell.renderContents().split("<br />")
312         builder["activity"] = activity_lines[0] # normally "building" or "idle"
313         # The middle lines document how long left for any current builds.
314         match = re.match("(?P<pending_builds>\d) pending", activity_lines[-1])
315         builder["pending_builds"] = int(match.group("pending_builds")) if match else 0
316
317     def _parse_builder_status_from_row(self, status_row):
318         status_cells = status_row.findAll('td')
319         builder = {}
320
321         # First cell is the name
322         name_link = status_cells[0].find('a')
323         builder["name"] = unicode(name_link.string)
324
325         self._parse_last_build_cell(builder, status_cells[1])
326         self._parse_current_build_cell(builder, status_cells[2])
327         return builder
328
329     def _matches_regexps(self, builder_name, name_regexps):
330         for name_regexp in name_regexps:
331             if re.match(name_regexp, builder_name):
332                 return True
333         return False
334
335     # FIXME: Should move onto Builder
336     def _is_core_builder(self, builder_name):
337         return self._matches_regexps(builder_name, self.core_builder_names_regexps)
338
339     # FIXME: This method needs to die, but is used by a unit test at the moment.
340     def _builder_statuses_with_names_matching_regexps(self, builder_statuses, name_regexps):
341         return [builder for builder in builder_statuses if self._matches_regexps(builder["name"], name_regexps)]
342
343     def red_core_builders(self):
344         return [builder for builder in self.core_builder_statuses() if not builder["is_green"]]
345
346     def red_core_builders_names(self):
347         return [builder["name"] for builder in self.red_core_builders()]
348
349     def idle_red_core_builders(self):
350         return [builder for builder in self.red_core_builders() if builder["activity"] == "idle"]
351
352     def core_builders_are_green(self):
353         return not self.red_core_builders()
354
355     # FIXME: These _fetch methods should move to a networking class.
356     def _fetch_build_dictionary(self, builder, build_number):
357         try:
358             base = "http://%s" % self.buildbot_host
359             path = urllib.quote("json/builders/%s/builds/%s" % (builder.name(),
360                                                                 build_number))
361             url = "%s/%s" % (base, path)
362             jsondata = urllib2.urlopen(url)
363             return json.load(jsondata)
364         except urllib2.URLError, err:
365             build_url = Build.build_url(builder, build_number)
366             _log.error("Error fetching data for %s build %s (%s): %s" % (builder.name(), build_number, build_url, err))
367             return None
368         except ValueError, err:
369             build_url = Build.build_url(builder, build_number)
370             _log.error("Error decoding json data from %s: %s" % (build_url, err))
371             return None
372
373     def _fetch_one_box_per_builder(self):
374         build_status_url = "http://%s/one_box_per_builder" % self.buildbot_host
375         return urllib2.urlopen(build_status_url)
376
377     def _file_cell_text(self, file_cell):
378         """Traverses down through firstChild elements until one containing a string is found, then returns that string"""
379         element = file_cell
380         while element.string is None and element.contents:
381             element = element.contents[0]
382         return element.string
383
384     def _parse_twisted_file_row(self, file_row):
385         string_or_empty = lambda string: unicode(string) if string else u""
386         file_cells = file_row.findAll('td')
387         return {
388             "filename": string_or_empty(self._file_cell_text(file_cells[0])),
389             "size": string_or_empty(self._file_cell_text(file_cells[1])),
390             "type": string_or_empty(self._file_cell_text(file_cells[2])),
391             "encoding": string_or_empty(self._file_cell_text(file_cells[3])),
392         }
393
394     def _parse_twisted_directory_listing(self, page):
395         soup = BeautifulSoup(page)
396         # HACK: Match only table rows with a class to ignore twisted header/footer rows.
397         file_rows = soup.find('table').findAll('tr', {'class': re.compile(r'\b(?:directory|file)\b')})
398         return [self._parse_twisted_file_row(file_row) for file_row in file_rows]
399
400     # FIXME: There should be a better way to get this information directly from twisted.
401     def _fetch_twisted_directory_listing(self, url):
402         return self._parse_twisted_directory_listing(urllib2.urlopen(url))
403
404     def builders(self):
405         return [self.builder_with_name(status["name"]) for status in self.builder_statuses()]
406
407     # This method pulls from /one_box_per_builder as an efficient way to get information about
408     def builder_statuses(self):
409         soup = BeautifulSoup(self._fetch_one_box_per_builder())
410         return [self._parse_builder_status_from_row(status_row) for status_row in soup.find('table').findAll('tr')]
411
412     def core_builder_statuses(self):
413         return [builder for builder in self.builder_statuses() if self._is_core_builder(builder["name"])]
414
415     def builder_with_name(self, name):
416         builder = self._builder_by_name.get(name)
417         if not builder:
418             builder = Builder(name, self)
419             self._builder_by_name[name] = builder
420         return builder
421
422     def failure_map(self, only_core_builders=True):
423         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
424         failure_map = FailureMap()
425         revision_to_failing_bots = {}
426         for builder_status in builder_statuses:
427             if builder_status["is_green"]:
428                 continue
429             builder = self.builder_with_name(builder_status["name"])
430             regression_window = builder.find_blameworthy_regression_window(builder_status["build_number"])
431             if regression_window:
432                 failure_map.add_regression_window(builder, regression_window)
433         return failure_map
434
435     # This makes fewer requests than calling Builder.latest_build would.  It grabs all builder
436     # statuses in one request using self.builder_statuses (fetching /one_box_per_builder instead of builder pages).
437     def _latest_builds_from_builders(self, only_core_builders=True):
438         builder_statuses = self.core_builder_statuses() if only_core_builders else self.builder_statuses()
439         return [self.builder_with_name(status["name"]).build(status["build_number"]) for status in builder_statuses]
440
441     def _build_at_or_before_revision(self, build, revision):
442         while build:
443             if build.revision() <= revision:
444                 return build
445             build = build.previous_build()
446
447     def last_green_revision(self, only_core_builders=True):
448         builds = self._latest_builds_from_builders(only_core_builders)
449         target_revision = builds[0].revision()
450         # An alternate way to do this would be to start at one revision and walk backwards
451         # checking builder.build_for_revision, however build_for_revision is very slow on first load.
452         while True:
453             # Make builds agree on revision
454             builds = [self._build_at_or_before_revision(build, target_revision) for build in builds]
455             if None in builds: # One of the builds failed to load from the server.
456                 return None
457             min_revision = min(map(lambda build: build.revision(), builds))
458             if min_revision != target_revision:
459                 target_revision = min_revision
460                 continue # Builds don't all agree on revision, keep searching
461             # Check to make sure they're all green
462             all_are_green = reduce(operator.and_, map(lambda build: build.is_green(), builds))
463             if not all_are_green:
464                 target_revision -= 1
465                 continue
466             return min_revision