Rewrite 'pull-os-versions' script in Javascript to add support for reporting os revis...
[WebKit.git] / Websites / perf.webkit.org / tools / sync-commits.py
1 #!/usr/bin/python
2
3 import argparse
4 import json
5 import os.path
6 import re
7 import subprocess
8 import sys
9 import time
10 import urllib2
11
12 from datetime import datetime
13 from abc import ABCMeta, abstractmethod
14 from xml.dom.minidom import parseString as parseXmlString
15 from util import load_server_config
16 from util import submit_commits
17 from util import text_content
18
19
20 def main(argv):
21     parser = argparse.ArgumentParser()
22     parser.add_argument('--repository-config-json', required=True, help='The path to a JSON file that specifies subversion syncing options')
23     parser.add_argument('--server-config-json', required=True, help='The path to a JSON file that specifies the perf dashboard')
24     parser.add_argument('--seconds-to-sleep', type=float, default=900, help='The seconds to sleep between iterations')
25     parser.add_argument('--max-fetch-count', type=int, default=10, help='The number of commits to fetch at once')
26     parser.add_argument('--max-ancestor-fetch-count', type=int, default=100, help='The number of commits to fetch at once if some commits are missing previous commits')
27     args = parser.parse_args()
28
29     with open(args.repository_config_json) as repository_config_json:
30         repositories = [load_repository(repository_info) for repository_info in json.load(repository_config_json)]
31
32     while True:
33         server_config = load_server_config(args.server_config_json)
34         for repository in repositories:
35             try:
36                 repository.fetch_commits_and_submit(server_config, args.max_fetch_count, args.max_ancestor_fetch_count)
37             except Exception as error:
38                 print "Failed to fetch and sync:", error
39
40         print "Sleeping for %d seconds..." % args.seconds_to_sleep
41         time.sleep(args.seconds_to_sleep)
42
43
44 def load_repository(repository):
45     if 'gitCheckout' in repository:
46         return GitRepository(name=repository['name'], git_url=repository['url'], git_checkout=repository['gitCheckout'])
47     return SVNRepository(name=repository['name'], svn_url=repository['url'], should_trust_certificate=repository.get('trustCertificate', False),
48         use_server_auth=repository.get('useServerAuth', False), account_name_script_path=repository.get('accountNameFinderScript'))
49
50
51 class Repository(object):
52     ___metaclass___ = ABCMeta
53
54     _name_account_compound_regex = re.compile(r'^\s*(?P<name>(\".+\"|[^<]+?))\s*\<(?P<account>.+)\>\s*$')
55
56     def __init__(self, name):
57         self._name = name
58         self._last_fetched = None
59
60     def fetch_commits_and_submit(self, server_config, max_fetch_count, max_ancestor_fetch_count):
61         if not self._last_fetched:
62             print "Determining the starting revision for %s" % self._name
63             self._last_fetched = self.determine_last_reported_revision(server_config)
64
65         pending_commits = []
66         for unused in range(max_fetch_count):
67             commit = self.fetch_next_commit(server_config, self._last_fetched)
68             if not commit:
69                 break
70             pending_commits += [commit]
71             self._last_fetched = commit['revision']
72
73         if not pending_commits:
74             print "No new revision found for %s (last fetched: %s)" % (self._name, self.format_revision(self._last_fetched))
75             return
76
77         for unused in range(max_ancestor_fetch_count):
78             revision_list = ', '.join([self.format_revision(commit['revision']) for commit in pending_commits])
79             print "Submitting revisions %s for %s to %s" % (revision_list, self._name, server_config['server']['url'])
80
81             result = submit_commits(pending_commits, server_config['server']['url'],
82                 server_config['slave']['name'], server_config['slave']['password'], ['OK', 'FailedToFindPreviousCommit'])
83
84             if result.get('status') == 'OK':
85                 break
86
87             if result.get('status') == 'FailedToFindPreviousCommit':
88                 previous_commit = self.fetch_commit(server_config, result['commit']['previousCommit'])
89                 if not previous_commit:
90                     raise Exception('Could not find the previous commit %s of %s' % (result['commit']['previousCommit'], result['commit']['revision']))
91                 pending_commits = [previous_commit] + pending_commits
92
93         if result.get('status') != 'OK':
94             raise Exception(result)
95
96         print "Successfully submitted."
97         print
98
99     @abstractmethod
100     def fetch_next_commit(self, server_config, last_fetched):
101         pass
102
103     @abstractmethod
104     def fetch_commit(self, server_config, last_fetched):
105         pass
106
107     @abstractmethod
108     def format_revision(self, revision):
109         pass
110
111     def determine_last_reported_revision(self, server_config):
112         last_reported_revision = self.fetch_revision_from_dasbhoard(server_config, 'last-reported')
113         if last_reported_revision:
114             return last_reported_revision
115
116     def fetch_revision_from_dasbhoard(self, server_config, filter):
117         result = urllib2.urlopen(server_config['server']['url'] + '/api/commits/' + self._name + '/' + filter).read()
118         parsed_result = json.loads(result)
119         if parsed_result['status'] != 'OK' and parsed_result['status'] != 'RepositoryNotFound':
120             raise Exception(result)
121         commits = parsed_result.get('commits')
122         return commits[0]['revision'] if commits else None
123
124
125 class SVNRepository(Repository):
126
127     def __init__(self, name, svn_url, should_trust_certificate, use_server_auth, account_name_script_path):
128         assert not account_name_script_path or isinstance(account_name_script_path, list)
129         super(SVNRepository, self).__init__(name)
130         self._svn_url = svn_url
131         self._should_trust_certificate = should_trust_certificate
132         self._use_server_auth = use_server_auth
133         self._account_name_script_path = account_name_script_path
134
135     def fetch_next_commit(self, server_config, last_fetched):
136         if not last_fetched:
137             # FIXME: This is a problematic if dashboard can get results for revisions older than oldest_revision
138             # in the future because we never refetch older revisions.
139             last_fetched = self.fetch_revision_from_dasbhoard(server_config, 'oldest')
140
141         revision_to_fetch = int(last_fetched) + 1
142
143         args = ['svn', 'log', '--revision', str(revision_to_fetch), '--xml', self._svn_url, '--non-interactive']
144         if self._use_server_auth and 'auth' in server_config['server']:
145             server_auth = server_config['server']['auth']
146             args += ['--no-auth-cache', '--username', server_auth['username'], '--password', server_auth['password']]
147         if self._should_trust_certificate:
148             args += ['--trust-server-cert']
149
150         try:
151             output = subprocess.check_output(args, stderr=subprocess.STDOUT)
152         except subprocess.CalledProcessError as error:
153             if (': No such revision ' + str(revision_to_fetch)) in error.output:
154                 return None
155             raise error
156
157         xml = parseXmlString(output)
158         time = text_content(xml.getElementsByTagName("date")[0])
159         author_elements = xml.getElementsByTagName("author")
160         author_account = text_content(author_elements[0]) if author_elements.length else None
161         message = text_content(xml.getElementsByTagName("msg")[0])
162
163         name = self._resolve_author_name(author_account) if author_account and self._account_name_script_path else None
164
165         result = {
166             'repository': self._name,
167             'revision': revision_to_fetch,
168             'time': time,
169             'message': message,
170         }
171
172         if author_account:
173             result['author'] = {'account': author_account, 'name': name}
174
175         return result
176
177     def _resolve_author_name(self, account):
178         try:
179             output = subprocess.check_output(self._account_name_script_path + [account])
180         except subprocess.CalledProcessError:
181             print 'Failed to resolve the name for account:', account
182             return None
183
184         match = Repository._name_account_compound_regex.match(output)
185         if match:
186             return match.group('name').strip('"')
187         return output.strip()
188
189     def format_revision(self, revision):
190         return 'r' + str(revision)
191
192
193 class GitRepository(Repository):
194
195     def __init__(self, name, git_checkout, git_url):
196         assert(os.path.isdir(git_checkout))
197         super(GitRepository, self).__init__(name)
198         self._git_checkout = git_checkout
199         self._git_url = git_url
200         self._tokenized_hashes = []
201
202     def fetch_next_commit(self, server_config, last_fetched):
203         if not last_fetched:
204             self._fetch_all_hashes()
205             tokens = self._tokenized_hashes[0]
206         else:
207             tokens = self._find_next_hash(last_fetched)
208             if not tokens:
209                 self._fetch_all_hashes()
210                 tokens = self._find_next_hash(last_fetched)
211                 if not tokens:
212                     return None
213         return self._revision_from_tokens(tokens)
214
215     def fetch_commit(self, server_config, hash_to_find):
216         assert(self._tokenized_hashes)
217         for i, tokens in enumerate(self._tokenized_hashes):
218             if tokens and tokens[0] == hash_to_find:
219                 return self._revision_from_tokens(tokens)
220         return None
221
222     def _revision_from_tokens(self, tokens):
223         current_hash = tokens[0]
224         commit_time = int(tokens[1])
225         author_email = tokens[2]
226         previous_hash = tokens[3] if len(tokens) >= 4 else None
227
228         author_name = self._run_git_command(['log', current_hash, '-1', '--pretty=%cn'])
229         message = self._run_git_command(['log', current_hash, '-1', '--pretty=%B'])
230
231         return {
232             'repository': self._name,
233             'revision': current_hash,
234             'previousCommit': previous_hash,
235             'time': datetime.fromtimestamp(commit_time).strftime(r'%Y-%m-%dT%H:%M:%S.%f'),
236             'author': {'account': author_email, 'name': author_name},
237             'message': message,
238         }
239
240     def _find_next_hash(self, hash_to_find):
241         for i, tokens in enumerate(self._tokenized_hashes):
242             if tokens and tokens[0] == hash_to_find:
243                 return self._tokenized_hashes[i + 1] if i + 1 < len(self._tokenized_hashes) else None
244         return None
245
246     def _fetch_all_hashes(self):
247         self._run_git_command(['pull', self._git_url])
248         lines = self._run_git_command(['log', '--all', '--date-order', '--reverse', '--pretty=%H %ct %ce %P']).split('\n')
249         self._tokenized_hashes = [line.split() for line in lines]
250
251     def _run_git_command(self, args):
252         return subprocess.check_output(['git', '-C', self._git_checkout] + args, stderr=subprocess.STDOUT)
253
254     def format_revision(self, revision):
255         return str(revision)[0:8]
256
257
258 if __name__ == "__main__":
259     main(sys.argv)