Perf-o-matic should store "values" and support array'ed input
[WebKit-https.git] / Websites / webkit-perf.appspot.com / models.py
1 #!/usr/bin/env python
2 # Copyright (C) 2012 Google Inc. All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #     * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #     * Redistributions in binary form must reproduce the above
11 # copyright notice, this list of conditions and the following disclaimer
12 # in the documentation and/or other materials provided with the
13 # distribution.
14 #     * Neither the name of Google Inc. nor the names of its
15 # contributors may be used to endorse or promote products derived from
16 # this software without specific prior written permission.
17 #
18 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 import hashlib
31 import json
32 import math
33 import re
34
35 from datetime import datetime
36 from datetime import timedelta
37 from google.appengine.ext import db
38 from google.appengine.api import memcache
39 from time import mktime
40
41
42 class NumericIdHolder(db.Model):
43     owner = db.ReferenceProperty()
44     # Dummy class whose sole purpose is to generate key().id()
45
46
47 def create_in_transaction_with_numeric_id_holder(callback):
48     id_holder = NumericIdHolder()
49     id_holder.put()
50     id_holder = NumericIdHolder.get(id_holder.key())
51     owner = None
52     try:
53         owner = db.run_in_transaction(callback, id_holder.key().id())
54         if owner:
55             id_holder.owner = owner
56             id_holder.put()
57     finally:
58         if not owner:
59             id_holder.delete()
60     return owner
61
62
63 def delete_model_with_numeric_id_holder(model):
64     id_holder = NumericIdHolder.get_by_id(model.id)
65     model.delete()
66     id_holder.delete()
67
68
69 def model_from_numeric_id(id, expected_kind):
70     id_holder = NumericIdHolder.get_by_id(id)
71     return id_holder.owner if id_holder and id_holder.owner and isinstance(id_holder.owner, expected_kind) else None
72
73
74 def _create_if_possible(model, key, name):
75
76     def execute(id):
77         if model.get_by_key_name(key):
78             return None
79         branch = model(id=id, name=name, key_name=key)
80         branch.put()
81         return branch
82
83     return create_in_transaction_with_numeric_id_holder(execute)
84
85
86 class Branch(db.Model):
87     id = db.IntegerProperty(required=True)
88     name = db.StringProperty(required=True)
89
90     @staticmethod
91     def create_if_possible(key, name):
92         return _create_if_possible(Branch, key, name)
93
94
95 class Platform(db.Model):
96     id = db.IntegerProperty(required=True)
97     name = db.StringProperty(required=True)
98     hidden = db.BooleanProperty()
99
100     @staticmethod
101     def create_if_possible(key, name):
102         return _create_if_possible(Platform, key, name)
103
104
105 class Builder(db.Model):
106     name = db.StringProperty(required=True)
107     password = db.StringProperty(required=True)
108
109     @staticmethod
110     def create(name, raw_password):
111         return Builder(name=name, password=Builder._hashed_password(raw_password), key_name=name).put()
112
113     def update_password(self, raw_password):
114         self.password = Builder._hashed_password(raw_password)
115         self.put()
116
117     def authenticate(self, raw_password):
118         return self.password == hashlib.sha256(raw_password).hexdigest()
119
120     @staticmethod
121     def _hashed_password(raw_password):
122         return hashlib.sha256(raw_password).hexdigest()
123
124
125 class Build(db.Model):
126     branch = db.ReferenceProperty(Branch, required=True, collection_name='build_branch')
127     platform = db.ReferenceProperty(Platform, required=True, collection_name='build_platform')
128     builder = db.ReferenceProperty(Builder, required=True, collection_name='builder_key')
129     buildNumber = db.IntegerProperty(required=True)
130     revision = db.IntegerProperty(required=True)
131     chromiumRevision = db.IntegerProperty()
132     timestamp = db.DateTimeProperty(required=True)
133
134     @staticmethod
135     def get_or_insert_from_log(log):
136         builder = log.builder()
137         key_name = builder.name + ':' + str(int(mktime(log.timestamp().timetuple())))
138
139         return Build.get_or_insert(key_name, branch=log.branch(), platform=log.platform(), builder=builder,
140             buildNumber=log.build_number(), timestamp=log.timestamp(),
141             revision=log.webkit_revision(), chromiumRevision=log.chromium_revision())
142
143
144 class Test(db.Model):
145     id = db.IntegerProperty(required=True)
146     name = db.StringProperty(required=True)
147     # FIXME: Storing branches and platforms separately is flawed since a test maybe available on
148     # one platform but only on some branch and vice versa.
149     branches = db.ListProperty(db.Key)
150     platforms = db.ListProperty(db.Key)
151     unit = db.StringProperty()
152     hidden = db.BooleanProperty()
153
154     @staticmethod
155     def update_or_insert(test_name, branch, platform, unit=None):
156         existing_test = [None]
157
158         def execute(id):
159             test = Test.get_by_key_name(test_name)
160             if test:
161                 if branch.key() not in test.branches:
162                     test.branches.append(branch.key())
163                 if platform.key() not in test.platforms:
164                     test.platforms.append(platform.key())
165                 test.unit = unit
166                 test.put()
167                 existing_test[0] = test
168                 return None
169
170             test = Test(id=id, name=test_name, key_name=test_name, unit=unit, branches=[branch.key()], platforms=[platform.key()])
171             test.put()
172             return test
173
174         return create_in_transaction_with_numeric_id_holder(execute) or existing_test[0]
175
176     def merge(self, other):
177         assert self.key() != other.key()
178
179         merged_results = TestResult.all()
180         merged_results.filter('name =', other.name)
181
182         # FIXME: We should be doing this check in a transaction but only ancestor queries are allowed
183         for result in merged_results:
184             if TestResult.get_by_key_name(TestResult.key_name(result.build, self.name)):
185                 return None
186
187         branches_and_platforms_to_update = set()
188         for result in merged_results:
189             branches_and_platforms_to_update.add((result.build.branch.id, result.build.platform.id))
190             result.replace_to_change_test_name(self.name)
191
192         delete_model_with_numeric_id_holder(other)
193
194         return branches_and_platforms_to_update
195
196
197 class TestResult(db.Model):
198     name = db.StringProperty(required=True)
199     build = db.ReferenceProperty(Build, required=True)
200     value = db.FloatProperty(required=True)
201     valueMedian = db.FloatProperty()
202     valueStdev = db.FloatProperty()
203     valueMin = db.FloatProperty()
204     valueMax = db.FloatProperty()
205     values = db.ListProperty(float)
206
207     @staticmethod
208     def key_name(build, test_name):
209         return build.key().name() + ':' + test_name
210
211     @classmethod
212     def get_or_insert_from_parsed_json(cls, test_name, build, result):
213         key_name = cls.key_name(build, test_name)
214
215         def _float_or_none(dictionary, key):
216             value = dictionary.get(key)
217             if value:
218                 return float(value)
219             return None
220
221         if not isinstance(result, dict):
222             return cls.get_or_insert(key_name, name=test_name, build=build, value=float(result))
223
224         return cls.get_or_insert(key_name, name=test_name, build=build, value=float(result['avg']),
225             valueMedian=_float_or_none(result, 'median'), valueStdev=_float_or_none(result, 'stdev'),
226             valueMin=_float_or_none(result, 'min'), valueMax=_float_or_none(result, 'max'),
227             values=result.get('values', []))
228
229     def replace_to_change_test_name(self, new_name):
230         clone = TestResult(key_name=TestResult.key_name(self.build, new_name), name=new_name, build=self.build,
231             value=self.value, valueMedian=self.valueMedian, valueStdev=self.valueMin, valueMin=self.valueMin, valueMax=self.valueMax)
232         clone.put()
233         self.delete()
234         return clone
235
236
237 class ReportLog(db.Model):
238     timestamp = db.DateTimeProperty(required=True)
239     headers = db.TextProperty()
240     payload = db.TextProperty()
241     commit = db.BooleanProperty()
242
243     def _parsed_payload(self):
244         if self.__dict__.get('_parsed') == None:
245             try:
246                 self._parsed = json.loads(self.payload)
247             except ValueError:
248                 self._parsed = False
249         return self._parsed
250
251     def get_value(self, keyName):
252         parsed = self._parsed_payload()
253         if not parsed:
254             return None
255         if isinstance(parsed, list):
256             parsed = parsed[0]
257         return parsed.get(keyName)
258
259     def results(self):
260         return self.get_value('results')
261
262     def results_are_well_formed(self):
263
264         def _is_float_convertible(value):
265             try:
266                 float(value)
267                 return True
268             except TypeError:
269                 return False
270             except ValueError:
271                 return False
272
273         if isinstance(self._parsed_payload(), list) and len(self._parsed_payload()) != 1:
274             return False
275
276         if not isinstance(self.results(), dict):
277             return False
278
279         for testResult in self.results().values():
280             if isinstance(testResult, dict):
281                 for key, value in testResult.iteritems():
282                     if key == "values":
283                         if not isinstance(value, list):
284                             return False
285                         for item in value:
286                             if not _is_float_convertible(item):
287                                 return False
288                     elif key != "unit" and not _is_float_convertible(value):
289                         return False
290                 if 'avg' not in testResult:
291                     return False
292                 continue
293             if not _is_float_convertible(testResult):
294                 return False
295
296         return True
297
298     def builder(self):
299         return self._model_by_key_name_in_payload(Builder, 'builder-name')
300
301     def branch(self):
302         return self._model_by_key_name_in_payload(Branch, 'branch')
303
304     def platform(self):
305         return self._model_by_key_name_in_payload(Platform, 'platform')
306
307     def build_number(self):
308         return self._integer_in_payload('build-number')
309
310     def webkit_revision(self):
311         return self._integer_in_payload('webkit-revision')
312
313     def chromium_revision(self):
314         return self._integer_in_payload('chromium-revision')
315
316     def _model_by_key_name_in_payload(self, model, keyName):
317         key = self.get_value(keyName)
318         if not key:
319             return None
320         return model.get_by_key_name(key)
321
322     def _integer_in_payload(self, keyName):
323         try:
324             return int(self.get_value(keyName))
325         except TypeError:
326             return None
327         except ValueError:
328             return None
329
330     # FIXME: We also have timestamp as a member variable.
331     def timestamp(self):
332         try:
333             return datetime.fromtimestamp(self._integer_in_payload('timestamp'))
334         except TypeError:
335             return None
336         except ValueError:
337             return None
338
339
340 class PersistentCache(db.Model):
341     value = db.TextProperty(required=True)
342
343     @staticmethod
344     def set_cache(name, value):
345         memcache.set(name, value)
346         PersistentCache(key_name=name, value=value).put()
347
348     @staticmethod
349     def get_cache(name):
350         value = memcache.get(name)
351         if value:
352             return value
353         cache = PersistentCache.get_by_key_name(name)
354         if not cache:
355             return None
356         memcache.set(name, cache.value)
357         return cache.value
358
359
360 class Runs(db.Model):
361     branch = db.ReferenceProperty(Branch, required=True, collection_name='runs_branch')
362     platform = db.ReferenceProperty(Platform, required=True, collection_name='runs_platform')
363     test = db.ReferenceProperty(Test, required=True, collection_name='runs_test')
364     json_runs = db.TextProperty()
365     json_averages = db.TextProperty()
366     json_min = db.FloatProperty()
367     json_max = db.FloatProperty()
368
369     @staticmethod
370     def _generate_runs(branch, platform, test_name):
371         builds = Build.all()
372         builds.filter('branch =', branch)
373         builds.filter('platform =', platform)
374
375         for build in builds:
376             results = TestResult.all()
377             results.filter('name =', test_name)
378             results.filter('build =', build)
379             for result in results:
380                 yield build, result
381         raise StopIteration
382
383     @staticmethod
384     def _entry_from_build_and_result(build, result):
385         builder_id = build.builder.key().id()
386         timestamp = mktime(build.timestamp.timetuple())
387         statistics = None
388         supplementary_revisions = None
389
390         if result.valueStdev != None and result.valueMin != None and result.valueMax != None:
391             statistics = {'stdev': result.valueStdev, 'min': result.valueMin, 'max': result.valueMax}
392
393         if build.chromiumRevision != None:
394             supplementary_revisions = {'Chromium': build.chromiumRevision}
395
396         return [result.key().id(),
397             [build.key().id(), build.buildNumber, build.revision, supplementary_revisions],
398             timestamp, result.value, 0,  # runNumber
399             [],  # annotations
400             builder_id, statistics]
401
402     @staticmethod
403     def _timestamp_and_value_from_json_entry(json_entry):
404         return json_entry[2], json_entry[3]
405
406     @staticmethod
407     def _key_name(branch_id, platform_id, test_id):
408         return 'runs:%d,%d,%d' % (test_id, branch_id, platform_id)
409
410     @classmethod
411     def update_or_insert(cls, branch, platform, test):
412         key_name = cls._key_name(branch.id, platform.id, test.id)
413         runs = Runs(key_name=key_name, branch=branch, platform=platform, test=test, json_runs='', json_averages='')
414
415         for build, result in cls._generate_runs(branch, platform, test.name):
416             runs.update_incrementally(build, result, check_duplicates_and_save=False)
417
418         runs.put()
419         memcache.set(key_name, runs.to_json())
420         return runs
421
422     def update_incrementally(self, build, result, check_duplicates_and_save=True):
423         new_entry = Runs._entry_from_build_and_result(build, result)
424
425         # Check for duplicate entries
426         if check_duplicates_and_save:
427             revision_is_in_runs = str(build.revision) in json.loads('{' + self.json_averages + '}')
428             if revision_is_in_runs and new_entry[1] in [entry[1] for entry in json.loads('[' + self.json_runs + ']')]:
429                 return
430
431         if self.json_runs:
432             self.json_runs += ','
433
434         if self.json_averages:
435             self.json_averages += ','
436
437         self.json_runs += json.dumps(new_entry)
438         # FIXME: Calculate the average. In practice, we wouldn't have more than one value for a given revision.
439         self.json_averages += '"%d": %f' % (build.revision, result.value)
440         self.json_min = min(self.json_min, result.value) if self.json_min != None else result.value
441         self.json_max = max(self.json_max, result.value)
442
443         if check_duplicates_and_save:
444             self.put()
445             memcache.set(self.key().name(), self.to_json())
446
447     @staticmethod
448     def get_by_objects(branch, platform, test):
449         return Runs.get_by_key_name(Runs._key_name(branch.id, platform.id, test.id))
450
451     @classmethod
452     def json_by_ids(cls, branch_id, platform_id, test_id):
453         key_name = cls._key_name(branch_id, platform_id, test_id)
454         runs_json = memcache.get(key_name)
455         if not runs_json:
456             runs = cls.get_by_key_name(key_name)
457             if not runs:
458                 return None
459             runs_json = runs.to_json()
460             memcache.set(key_name, runs_json)
461         return runs_json
462
463     def to_json(self):
464         # date_range is never used by common.js.
465         return '{"test_runs": [%s], "averages": {%s}, "min": %s, "max": %s, "unit": %s, "date_range": null, "stat": "ok"}' % (self.json_runs,
466             self.json_averages, str(self.json_min) if self.json_min else 'null', str(self.json_max) if self.json_max else 'null',
467             '"%s"' % self.test.unit if self.test.unit else 'null')
468
469     def chart_params(self, display_days):
470         chart_data_x = []
471         chart_data_y = []
472         timestamp_from_entry = lambda entry: Runs._timestamp_and_value_from_json_entry(entry)[0]
473         runs = sorted(json.loads('[' + self.json_runs + ']'), lambda a, b: int(timestamp_from_entry(a) - timestamp_from_entry(b)))
474         if not runs:
475             return None
476
477         end_timestamp = timestamp_from_entry(runs[-1])
478         start_timestamp = end_timestamp - display_days * 24 * 3600
479         for entry in runs:
480             timestamp, value = Runs._timestamp_and_value_from_json_entry(entry)
481             if timestamp < start_timestamp or timestamp > end_timestamp:
482                 continue
483             chart_data_x.append(timestamp)
484             chart_data_y.append(value)
485
486         dates = [datetime.fromtimestamp(end_timestamp) - timedelta(display_days / 7.0 * (7 - i)) for i in range(0, 8)]
487
488         y_max = max(chart_data_y) * 1.1
489         y_axis_label_step = int(y_max / 5 + 0.5)  # This won't work for decimal numbers
490
491         return {
492             'cht': 'lxy',  # Specify with X and Y coordinates
493             'chxt': 'x,y',  # Display both X and Y axies
494             'chxl': '0:|' + '|'.join([date.strftime('%b %d') for date in dates]),  # X-axis labels
495             'chxr': '1,0,%f,%f' % (int(y_max + 0.5), y_axis_label_step),  # Y-axis range: min=0, max, step
496             'chds': '%f,%f,%f,%f' % (start_timestamp, end_timestamp, 0, y_max),  # X, Y data range
497             'chxs': '1,676767,11.167,0,l,676767',  # Y-axis label: 1,color,font-size,centerd on tick,axis line/no ticks, tick color
498             'chs': '360x240',  # Image size: 360px by 240px
499             'chco': 'ff0000',  # Plot line color
500             'chg': '%f,20,0,0' % (100 / (len(dates) - 1)),  # X, Y grid line step sizes - max is 100.
501             'chls': '3',  # Line thickness
502             'chf': 'bg,s,eff6fd',  # Transparent background
503             'chd': 't:' + ','.join([str(x) for x in chart_data_x]) + '|' + ','.join([str(y) for y in chart_data_y]),  # X, Y data
504         }
505
506
507 class DashboardImage(db.Model):
508     image = db.BlobProperty(required=True)
509     createdAt = db.DateTimeProperty(required=True, auto_now=True)
510
511     @staticmethod
512     def create(branch_id, platform_id, test_id, display_days, image):
513         key_name = DashboardImage.key_name(branch_id, platform_id, test_id, display_days)
514         instance = DashboardImage(key_name=key_name, image=image)
515         instance.put()
516         memcache.set('dashboard-image:' + key_name, image)
517         return instance
518
519     @staticmethod
520     def get_image(branch_id, platform_id, test_id, display_days):
521         key_name = DashboardImage.key_name(branch_id, platform_id, test_id, display_days)
522         image = memcache.get('dashboard-image:' + key_name)
523         if not image:
524             instance = DashboardImage.get_by_key_name(key_name)
525             image = instance.image if instance else None
526             memcache.set('dashboard-image:' + key_name, image)
527         return image
528
529     @classmethod
530     def needs_update(cls, branch_id, platform_id, test_id, display_days, now=datetime.now()):
531         if display_days < 10:
532             return True
533         image = DashboardImage.get_by_key_name(cls.key_name(branch_id, platform_id, test_id, display_days))
534         duration = math.sqrt(display_days) / 10
535         # e.g. 13 hours for 30 days, 23 hours for 90 days, and 46 hours for 365 days
536         return not image or image.createdAt < now - timedelta(duration)
537
538     @staticmethod
539     def key_name(branch_id, platform_id, test_id, display_days):
540         return '%d:%d:%d:%d' % (branch_id, platform_id, test_id, display_days)