45ed7973acb7b2c0b7f745b6a613fced074ca792
[WebKit-https.git] / Websites / webkit-perf.appspot.com / models.py
1 #!/usr/bin/env python
2 # Copyright (C) 2012 Google Inc. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 import hashlib
31 import json
32 import math
33 import re
34
35 from datetime import datetime
36 from datetime import timedelta
37 from google.appengine.ext import db
38 from google.appengine.api import memcache
39 from time import mktime
40
41
42 class NumericIdHolder(db.Model):
43     owner = db.ReferenceProperty()
44     # Dummy class whose sole purpose is to generate key().id()
45
46
47 def create_in_transaction_with_numeric_id_holder(callback):
48     id_holder = NumericIdHolder()
49     id_holder.put()
50     id_holder = NumericIdHolder.get(id_holder.key())
51     owner = None
52     try:
53         owner = db.run_in_transaction(callback, id_holder.key().id())
54         if owner:
55             id_holder.owner = owner
56             id_holder.put()
57     finally:
58         if not owner:
59             id_holder.delete()
60     return owner
61
62
63 def delete_model_with_numeric_id_holder(model):
64     id_holder = NumericIdHolder.get_by_id(model.id)
65     model.delete()
66     id_holder.delete()
67
68
69 def model_from_numeric_id(id, expected_kind):
70     id_holder = NumericIdHolder.get_by_id(id)
71     return id_holder.owner if id_holder and id_holder.owner and isinstance(id_holder.owner, expected_kind) else None
72
73
74 def _create_if_possible(model, key, name):
75
76     def execute(id):
77         if model.get_by_key_name(key):
78             return None
79         branch = model(id=id, name=name, key_name=key)
80         branch.put()
81         return branch
82
83     return create_in_transaction_with_numeric_id_holder(execute)
84
85
86 class Branch(db.Model):
87     id = db.IntegerProperty(required=True)
88     name = db.StringProperty(required=True)
89
90     @staticmethod
91     def create_if_possible(key, name):
92         return _create_if_possible(Branch, key, name)
93
94
95 class Platform(db.Model):
96     id = db.IntegerProperty(required=True)
97     name = db.StringProperty(required=True)
98     hidden = db.BooleanProperty()
99
100     @staticmethod
101     def create_if_possible(key, name):
102         return _create_if_possible(Platform, key, name)
103
104
105 class Builder(db.Model):
106     name = db.StringProperty(required=True)
107     password = db.StringProperty(required=True)
108
109     @staticmethod
110     def create(name, raw_password):
111         return Builder(name=name, password=Builder._hashed_password(raw_password), key_name=name).put()
112
113     def update_password(self, raw_password):
114         self.password = Builder._hashed_password(raw_password)
115         self.put()
116
117     def authenticate(self, raw_password):
118         return self.password == hashlib.sha256(raw_password).hexdigest()
119
120     @staticmethod
121     def _hashed_password(raw_password):
122         return hashlib.sha256(raw_password).hexdigest()
123
124
125 class Build(db.Model):
126     branch = db.ReferenceProperty(Branch, required=True, collection_name='build_branch')
127     platform = db.ReferenceProperty(Platform, required=True, collection_name='build_platform')
128     builder = db.ReferenceProperty(Builder, required=True, collection_name='builder_key')
129     buildNumber = db.IntegerProperty(required=True)
130     revision = db.IntegerProperty(required=True)
131     chromiumRevision = db.IntegerProperty()
132     timestamp = db.DateTimeProperty(required=True)
133
134     @staticmethod
135     def get_or_insert_from_log(log):
136         builder = log.builder()
137         key_name = builder.name + ':' + str(int(mktime(log.timestamp().timetuple())))
138
139         return Build.get_or_insert(key_name, branch=log.branch(), platform=log.platform(), builder=builder,
140             buildNumber=log.build_number(), timestamp=log.timestamp(),
141             revision=log.webkit_revision(), chromiumRevision=log.chromium_revision())
142
143
144 class Test(db.Model):
145     id = db.IntegerProperty(required=True)
146     name = db.StringProperty(required=True)
147     # FIXME: Storing branches and platforms separately is flawed since a test maybe available on
148     # one platform but only on some branch and vice versa.
149     branches = db.ListProperty(db.Key)
150     platforms = db.ListProperty(db.Key)
151     unit = db.StringProperty()
152     hidden = db.BooleanProperty()
153
154     @staticmethod
155     def update_or_insert(test_name, branch, platform, unit=None):
156         existing_test = [None]
157
158         def execute(id):
159             test = Test.get_by_key_name(test_name)
160             if test:
161                 if branch.key() not in test.branches:
162                     test.branches.append(branch.key())
163                 if platform.key() not in test.platforms:
164                     test.platforms.append(platform.key())
165                 test.unit = unit
166                 test.put()
167                 existing_test[0] = test
168                 return None
169
170             test = Test(id=id, name=test_name, key_name=test_name, unit=unit, branches=[branch.key()], platforms=[platform.key()])
171             test.put()
172             return test
173
174         return create_in_transaction_with_numeric_id_holder(execute) or existing_test[0]
175
176     def merge(self, other):
177         assert self.key() != other.key()
178
179         merged_results = TestResult.all()
180         merged_results.filter('name =', other.name)
181
182         # FIXME: We should be doing this check in a transaction but only ancestor queries are allowed
183         for result in merged_results:
184             if TestResult.get_by_key_name(TestResult.key_name(result.build, self.name)):
185                 return None
186
187         branches_and_platforms_to_update = set()
188         for result in merged_results:
189             branches_and_platforms_to_update.add((result.build.branch.id, result.build.platform.id))
190             result.replace_to_change_test_name(self.name)
191
192         delete_model_with_numeric_id_holder(other)
193
194         return branches_and_platforms_to_update
195
196
197 class TestResult(db.Model):
198     name = db.StringProperty(required=True)
199     build = db.ReferenceProperty(Build, required=True)
200     value = db.FloatProperty(required=True)
201     valueMedian = db.FloatProperty()
202     valueStdev = db.FloatProperty()
203     valueMin = db.FloatProperty()
204     valueMax = db.FloatProperty()
205
206     @staticmethod
207     def key_name(build, test_name):
208         return build.key().name() + ':' + test_name
209
210     @classmethod
211     def get_or_insert_from_parsed_json(cls, test_name, build, result):
212         key_name = cls.key_name(build, test_name)
213
214         def _float_or_none(dictionary, key):
215             value = dictionary.get(key)
216             if value:
217                 return float(value)
218             return None
219
220         if not isinstance(result, dict):
221             return cls.get_or_insert(key_name, name=test_name, build=build, value=float(result))
222
223         return cls.get_or_insert(key_name, name=test_name, build=build, value=float(result['avg']),
224             valueMedian=_float_or_none(result, 'median'), valueStdev=_float_or_none(result, 'stdev'),
225             valueMin=_float_or_none(result, 'min'), valueMax=_float_or_none(result, 'max'))
226
227     def replace_to_change_test_name(self, new_name):
228         clone = TestResult(key_name=TestResult.key_name(self.build, new_name), name=new_name, build=self.build,
229             value=self.value, valueMedian=self.valueMedian, valueStdev=self.valueMin, valueMin=self.valueMin, valueMax=self.valueMax)
230         clone.put()
231         self.delete()
232         return clone
233
234
235 class ReportLog(db.Model):
236     timestamp = db.DateTimeProperty(required=True)
237     headers = db.TextProperty()
238     payload = db.TextProperty()
239     commit = db.BooleanProperty()
240
241     def _parsed_payload(self):
242         if self.__dict__.get('_parsed') == None:
243             try:
244                 self._parsed = json.loads(self.payload)
245             except ValueError:
246                 self._parsed = False
247         return self._parsed
248
249     def get_value(self, keyName):
250         if not self._parsed_payload():
251             return None
252         return self._parsed.get(keyName)
253
254     def results(self):
255         return self.get_value('results')
256
257     def results_are_well_formed(self):
258
259         def _is_float_convertible(value):
260             try:
261                 float(value)
262                 return True
263             except TypeError:
264                 return False
265             except ValueError:
266                 return False
267
268         if not isinstance(self.results(), dict):
269             return False
270
271         for testResult in self.results().values():
272             if isinstance(testResult, dict):
273                 for key, value in testResult.iteritems():
274                     if key != "unit" and not _is_float_convertible(value):
275                         return False
276                 if 'avg' not in testResult:
277                     return False
278                 continue
279             if not _is_float_convertible(testResult):
280                 return False
281
282         return True
283
284     def builder(self):
285         return self._model_by_key_name_in_payload(Builder, 'builder-name')
286
287     def branch(self):
288         return self._model_by_key_name_in_payload(Branch, 'branch')
289
290     def platform(self):
291         return self._model_by_key_name_in_payload(Platform, 'platform')
292
293     def build_number(self):
294         return self._integer_in_payload('build-number')
295
296     def webkit_revision(self):
297         return self._integer_in_payload('webkit-revision')
298
299     def chromium_revision(self):
300         return self._integer_in_payload('chromium-revision')
301
302     def _model_by_key_name_in_payload(self, model, keyName):
303         key = self.get_value(keyName)
304         if not key:
305             return None
306         return model.get_by_key_name(key)
307
308     def _integer_in_payload(self, keyName):
309         try:
310             return int(self.get_value(keyName))
311         except TypeError:
312             return None
313         except ValueError:
314             return None
315
316     # FIXME: We also have timestamp as a member variable.
317     def timestamp(self):
318         try:
319             return datetime.fromtimestamp(self._integer_in_payload('timestamp'))
320         except TypeError:
321             return None
322         except ValueError:
323             return None
324
325
326 class PersistentCache(db.Model):
327     value = db.TextProperty(required=True)
328
329     @staticmethod
330     def set_cache(name, value):
331         memcache.set(name, value)
332         PersistentCache(key_name=name, value=value).put()
333
334     @staticmethod
335     def get_cache(name):
336         value = memcache.get(name)
337         if value:
338             return value
339         cache = PersistentCache.get_by_key_name(name)
340         if not cache:
341             return None
342         memcache.set(name, cache.value)
343         return cache.value
344
345
346 class Runs(db.Model):
347     branch = db.ReferenceProperty(Branch, required=True, collection_name='runs_branch')
348     platform = db.ReferenceProperty(Platform, required=True, collection_name='runs_platform')
349     test = db.ReferenceProperty(Test, required=True, collection_name='runs_test')
350     json_runs = db.TextProperty()
351     json_averages = db.TextProperty()
352     json_min = db.FloatProperty()
353     json_max = db.FloatProperty()
354
355     @staticmethod
356     def _generate_runs(branch, platform, test_name):
357         builds = Build.all()
358         builds.filter('branch =', branch)
359         builds.filter('platform =', platform)
360
361         for build in builds:
362             results = TestResult.all()
363             results.filter('name =', test_name)
364             results.filter('build =', build)
365             for result in results:
366                 yield build, result
367         raise StopIteration
368
369     @staticmethod
370     def _entry_from_build_and_result(build, result):
371         builder_id = build.builder.key().id()
372         timestamp = mktime(build.timestamp.timetuple())
373         statistics = None
374         supplementary_revisions = None
375
376         if result.valueStdev != None and result.valueMin != None and result.valueMax != None:
377             statistics = {'stdev': result.valueStdev, 'min': result.valueMin, 'max': result.valueMax}
378
379         if build.chromiumRevision != None:
380             supplementary_revisions = {'Chromium': build.chromiumRevision}
381
382         return [result.key().id(),
383             [build.key().id(), build.buildNumber, build.revision, supplementary_revisions],
384             timestamp, result.value, 0,  # runNumber
385             [],  # annotations
386             builder_id, statistics]
387
388     @staticmethod
389     def _timestamp_and_value_from_json_entry(json_entry):
390         return json_entry[2], json_entry[3]
391
392     @staticmethod
393     def _key_name(branch_id, platform_id, test_id):
394         return 'runs:%d,%d,%d' % (test_id, branch_id, platform_id)
395
396     @classmethod
397     def update_or_insert(cls, branch, platform, test):
398         key_name = cls._key_name(branch.id, platform.id, test.id)
399         runs = Runs(key_name=key_name, branch=branch, platform=platform, test=test, json_runs='', json_averages='')
400
401         for build, result in cls._generate_runs(branch, platform, test.name):
402             runs.update_incrementally(build, result, check_duplicates_and_save=False)
403
404         runs.put()
405         memcache.set(key_name, runs.to_json())
406         return runs
407
408     def update_incrementally(self, build, result, check_duplicates_and_save=True):
409         new_entry = Runs._entry_from_build_and_result(build, result)
410
411         # Check for duplicate entries
412         if check_duplicates_and_save:
413             revision_is_in_runs = str(build.revision) in json.loads('{' + self.json_averages + '}')
414             if revision_is_in_runs and new_entry[1] in [entry[1] for entry in json.loads('[' + self.json_runs + ']')]:
415                 return
416
417         if self.json_runs:
418             self.json_runs += ','
419
420         if self.json_averages:
421             self.json_averages += ','
422
423         self.json_runs += json.dumps(new_entry)
424         # FIXME: Calculate the average. In practice, we wouldn't have more than one value for a given revision.
425         self.json_averages += '"%d": %f' % (build.revision, result.value)
426         self.json_min = min(self.json_min, result.value) if self.json_min != None else result.value
427         self.json_max = max(self.json_max, result.value)
428
429         if check_duplicates_and_save:
430             self.put()
431             memcache.set(self.key().name(), self.to_json())
432
433     @staticmethod
434     def get_by_objects(branch, platform, test):
435         return Runs.get_by_key_name(Runs._key_name(branch.id, platform.id, test.id))
436
437     @classmethod
438     def json_by_ids(cls, branch_id, platform_id, test_id):
439         key_name = cls._key_name(branch_id, platform_id, test_id)
440         runs_json = memcache.get(key_name)
441         if not runs_json:
442             runs = cls.get_by_key_name(key_name)
443             if not runs:
444                 return None
445             runs_json = runs.to_json()
446             memcache.set(key_name, runs_json)
447         return runs_json
448
449     def to_json(self):
450         # date_range is never used by common.js.
451         return '{"test_runs": [%s], "averages": {%s}, "min": %s, "max": %s, "unit": %s, "date_range": null, "stat": "ok"}' % (self.json_runs,
452             self.json_averages, str(self.json_min) if self.json_min else 'null', str(self.json_max) if self.json_max else 'null',
453             '"%s"' % self.test.unit if self.test.unit else 'null')
454
455     def chart_params(self, display_days):
456         chart_data_x = []
457         chart_data_y = []
458         timestamp_from_entry = lambda entry: Runs._timestamp_and_value_from_json_entry(entry)[0]
459         runs = sorted(json.loads('[' + self.json_runs + ']'), lambda a, b: int(timestamp_from_entry(a) - timestamp_from_entry(b)))
460         if not runs:
461             return None
462
463         end_timestamp = timestamp_from_entry(runs[-1])
464         start_timestamp = end_timestamp - display_days * 24 * 3600
465         for entry in runs:
466             timestamp, value = Runs._timestamp_and_value_from_json_entry(entry)
467             if timestamp < start_timestamp or timestamp > end_timestamp:
468                 continue
469             chart_data_x.append(timestamp)
470             chart_data_y.append(value)
471
472         dates = [datetime.fromtimestamp(end_timestamp) - timedelta(display_days / 7.0 * (7 - i)) for i in range(0, 8)]
473
474         y_max = max(chart_data_y) * 1.1
475         y_axis_label_step = int(y_max / 5 + 0.5)  # This won't work for decimal numbers
476
477         return {
478             'cht': 'lxy',  # Specify with X and Y coordinates
479             'chxt': 'x,y',  # Display both X and Y axies
480             'chxl': '0:|' + '|'.join([date.strftime('%b %d') for date in dates]),  # X-axis labels
481             'chxr': '1,0,%f,%f' % (int(y_max + 0.5), y_axis_label_step),  # Y-axis range: min=0, max, step
482             'chds': '%f,%f,%f,%f' % (start_timestamp, end_timestamp, 0, y_max),  # X, Y data range
483             'chxs': '1,676767,11.167,0,l,676767',  # Y-axis label: 1,color,font-size,centerd on tick,axis line/no ticks, tick color
484             'chs': '360x240',  # Image size: 360px by 240px
485             'chco': 'ff0000',  # Plot line color
486             'chg': '%f,20,0,0' % (100 / (len(dates) - 1)),  # X, Y grid line step sizes - max is 100.
487             'chls': '3',  # Line thickness
488             'chf': 'bg,s,eff6fd',  # Transparent background
489             'chd': 't:' + ','.join([str(x) for x in chart_data_x]) + '|' + ','.join([str(y) for y in chart_data_y]),  # X, Y data
490         }
491
492
493 class DashboardImage(db.Model):
494     image = db.BlobProperty(required=True)
495     createdAt = db.DateTimeProperty(required=True, auto_now=True)
496
497     @staticmethod
498     def create(branch_id, platform_id, test_id, display_days, image):
499         key_name = DashboardImage.key_name(branch_id, platform_id, test_id, display_days)
500         instance = DashboardImage(key_name=key_name, image=image)
501         instance.put()
502         memcache.set('dashboard-image:' + key_name, image)
503         return instance
504
505     @staticmethod
506     def get_image(branch_id, platform_id, test_id, display_days):
507         key_name = DashboardImage.key_name(branch_id, platform_id, test_id, display_days)
508         image = memcache.get('dashboard-image:' + key_name)
509         if not image:
510             instance = DashboardImage.get_by_key_name(key_name)
511             image = instance.image if instance else None
512             memcache.set('dashboard-image:' + key_name, image)
513         return image
514
515     @classmethod
516     def needs_update(cls, branch_id, platform_id, test_id, display_days, now=datetime.now()):
517         if display_days < 10:
518             return True
519         image = DashboardImage.get_by_key_name(cls.key_name(branch_id, platform_id, test_id, display_days))
520         duration = math.sqrt(display_days) / 10
521         # e.g. 13 hours for 30 days, 23 hours for 90 days, and 46 hours for 365 days
522         return not image or image.createdAt < now - timedelta(duration)
523
524     @staticmethod
525     def key_name(branch_id, platform_id, test_id, display_days):
526         return '%d:%d:%d:%d' % (branch_id, platform_id, test_id, display_days)