Rename WebKitTools to Tools
[WebKit-https.git] / Tools / Scripts / webkitpy / common / system / autoinstall.py
1 # Copyright (c) 2009, Daniel Krech All rights reserved.
2 # Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #  * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #
11 #  * Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 #
15 #  * Neither the name of the Daniel Krech nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """Support for automatically downloading Python packages from an URL."""
32
33
34 from __future__ import with_statement
35
36 import codecs
37 import logging
38 import new
39 import os
40 import shutil
41 import sys
42 import tarfile
43 import tempfile
44 import urllib
45 import urlparse
46 import zipfile
47 import zipimport
48
49 _log = logging.getLogger(__name__)
50
51
52 class AutoInstaller(object):
53
54     """Supports automatically installing Python packages from an URL.
55
56     Supports uncompressed files, .tar.gz, and .zip formats.
57
58     Basic usage:
59
60     installer = AutoInstaller()
61
62     installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
63                       url_subpath="pep8-0.5.0/pep8.py")
64     installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip",
65                       url_subpath="mechanize")
66
67     """
68
69     def __init__(self, append_to_search_path=False, make_package=True,
70                  target_dir=None, temp_dir=None):
71         """Create an AutoInstaller instance, and set up the target directory.
72
73         Args:
74           append_to_search_path: A boolean value of whether to append the
75                                  target directory to the sys.path search path.
76           make_package: A boolean value of whether to make the target
77                         directory a package.  This adds an __init__.py file
78                         to the target directory -- allowing packages and
79                         modules within the target directory to be imported
80                         explicitly using dotted module names.
81           target_dir: The directory path to which packages should be installed.
82                       Defaults to a subdirectory of the folder containing
83                       this module called "autoinstalled".
84           temp_dir: The directory path to use for any temporary files
85                     generated while downloading, unzipping, and extracting
86                     packages to install.  Defaults to a standard temporary
87                     location generated by the tempfile module.  This
88                     parameter should normally be used only for development
89                     testing.
90
91         """
92         if target_dir is None:
93             this_dir = os.path.dirname(__file__)
94             target_dir = os.path.join(this_dir, "autoinstalled")
95
96         # Ensure that the target directory exists.
97         self._set_up_target_dir(target_dir, append_to_search_path, make_package)
98
99         self._target_dir = target_dir
100         self._temp_dir = temp_dir
101
102     def _log_transfer(self, message, source, target, log_method=None):
103         """Log a debug message that involves a source and target."""
104         if log_method is None:
105             log_method = _log.debug
106
107         log_method("%s" % message)
108         log_method('    From: "%s"' % source)
109         log_method('      To: "%s"' % target)
110
111     def _create_directory(self, path, name=None):
112         """Create a directory."""
113         log = _log.debug
114
115         name = name + " " if name is not None else ""
116         log('Creating %sdirectory...' % name)
117         log('    "%s"' % path)
118
119         os.makedirs(path)
120
121     def _write_file(self, path, text, encoding):
122         """Create a file at the given path with given text.
123
124         This method overwrites any existing file.
125
126         """
127         _log.debug("Creating file...")
128         _log.debug('    "%s"' % path)
129         with codecs.open(path, "w", encoding) as file:
130             file.write(text)
131
132     def _set_up_target_dir(self, target_dir, append_to_search_path,
133                            make_package):
134         """Set up a target directory.
135
136         Args:
137           target_dir: The path to the target directory to set up.
138           append_to_search_path: A boolean value of whether to append the
139                                  target directory to the sys.path search path.
140           make_package: A boolean value of whether to make the target
141                         directory a package.  This adds an __init__.py file
142                         to the target directory -- allowing packages and
143                         modules within the target directory to be imported
144                         explicitly using dotted module names.
145
146         """
147         if not os.path.exists(target_dir):
148             self._create_directory(target_dir, "autoinstall target")
149
150         if append_to_search_path:
151             sys.path.append(target_dir)
152
153         if make_package:
154             init_path = os.path.join(target_dir, "__init__.py")
155             if not os.path.exists(init_path):
156                 text = ("# This file is required for Python to search this "
157                         "directory for modules.\n")
158                 self._write_file(init_path, text, "ascii")
159
160     def _create_scratch_directory_inner(self, prefix):
161         """Create a scratch directory without exception handling.
162
163         Creates a scratch directory inside the AutoInstaller temp
164         directory self._temp_dir, or inside a platform-dependent temp
165         directory if self._temp_dir is None.  Returns the path to the
166         created scratch directory.
167
168         Raises:
169           OSError: [Errno 2] if the containing temp directory self._temp_dir
170                              is not None and does not exist.
171
172         """
173         # The tempfile.mkdtemp() method function requires that the
174         # directory corresponding to the "dir" parameter already exist
175         # if it is not None.
176         scratch_dir = tempfile.mkdtemp(prefix=prefix, dir=self._temp_dir)
177         return scratch_dir
178
179     def _create_scratch_directory(self, target_name):
180         """Create a temporary scratch directory, and return its path.
181
182         The scratch directory is generated inside the temp directory
183         of this AutoInstaller instance.  This method also creates the
184         temp directory if it does not already exist.
185
186         """
187         prefix = target_name + "_"
188         try:
189             scratch_dir = self._create_scratch_directory_inner(prefix)
190         except OSError:
191             # Handle case of containing temp directory not existing--
192             # OSError: [Errno 2] No such file or directory:...
193             temp_dir = self._temp_dir
194             if temp_dir is None or os.path.exists(temp_dir):
195                 raise
196             # Else try again after creating the temp directory.
197             self._create_directory(temp_dir, "autoinstall temp")
198             scratch_dir = self._create_scratch_directory_inner(prefix)
199
200         return scratch_dir
201
202     def _url_downloaded_path(self, target_name):
203         """Return the path to the file containing the URL downloaded."""
204         filename = ".%s.url" % target_name
205         path = os.path.join(self._target_dir, filename)
206         return path
207
208     def _is_downloaded(self, target_name, url):
209         """Return whether a package version has been downloaded."""
210         version_path = self._url_downloaded_path(target_name)
211
212         _log.debug('Checking %s URL downloaded...' % target_name)
213         _log.debug('    "%s"' % version_path)
214
215         if not os.path.exists(version_path):
216             # Then no package version has been downloaded.
217             _log.debug("No URL file found.")
218             return False
219
220         with codecs.open(version_path, "r", "utf-8") as file:
221             version = file.read()
222
223         return version.strip() == url.strip()
224
225     def _record_url_downloaded(self, target_name, url):
226         """Record the URL downloaded to a file."""
227         version_path = self._url_downloaded_path(target_name)
228         _log.debug("Recording URL downloaded...")
229         _log.debug('    URL: "%s"' % url)
230         _log.debug('     To: "%s"' % version_path)
231
232         self._write_file(version_path, url, "utf-8")
233
234     def _extract_targz(self, path, scratch_dir):
235         # tarfile.extractall() extracts to a path without the
236         # trailing ".tar.gz".
237         target_basename = os.path.basename(path[:-len(".tar.gz")])
238         target_path = os.path.join(scratch_dir, target_basename)
239
240         self._log_transfer("Starting gunzip/extract...", path, target_path)
241
242         try:
243             tar_file = tarfile.open(path)
244         except tarfile.ReadError, err:
245             # Append existing Error message to new Error.
246             message = ("Could not open tar file: %s\n"
247                        " The file probably does not have the correct format.\n"
248                        " --> Inner message: %s"
249                        % (path, err))
250             raise Exception(message)
251
252         try:
253             # This is helpful for debugging purposes.
254             _log.debug("Listing tar file contents...")
255             for name in tar_file.getnames():
256                 _log.debug('    * "%s"' % name)
257             _log.debug("Extracting gzipped tar file...")
258             tar_file.extractall(target_path)
259         finally:
260             tar_file.close()
261
262         return target_path
263
264     # This is a replacement for ZipFile.extractall(), which is
265     # available in Python 2.6 but not in earlier versions.
266     def _extract_all(self, zip_file, target_dir):
267         self._log_transfer("Extracting zip file...", zip_file, target_dir)
268
269         # This is helpful for debugging purposes.
270         _log.debug("Listing zip file contents...")
271         for name in zip_file.namelist():
272             _log.debug('    * "%s"' % name)
273
274         for name in zip_file.namelist():
275             path = os.path.join(target_dir, name)
276             self._log_transfer("Extracting...", name, path)
277
278             if not os.path.basename(path):
279                 # Then the path ends in a slash, so it is a directory.
280                 self._create_directory(path)
281                 continue
282             # Otherwise, it is a file.
283
284             try:
285                 # We open this file w/o encoding, as we're reading/writing
286                 # the raw byte-stream from the zip file.
287                 outfile = open(path, 'wb')
288             except IOError, err:
289                 # Not all zip files seem to list the directories explicitly,
290                 # so try again after creating the containing directory.
291                 _log.debug("Got IOError: retrying after creating directory...")
292                 dir = os.path.dirname(path)
293                 self._create_directory(dir)
294                 outfile = open(path, 'wb')
295
296             try:
297                 outfile.write(zip_file.read(name))
298             finally:
299                 outfile.close()
300
301     def _unzip(self, path, scratch_dir):
302         # zipfile.extractall() extracts to a path without the
303         # trailing ".zip".
304         target_basename = os.path.basename(path[:-len(".zip")])
305         target_path = os.path.join(scratch_dir, target_basename)
306
307         self._log_transfer("Starting unzip...", path, target_path)
308
309         try:
310             zip_file = zipfile.ZipFile(path, "r")
311         except zipfile.BadZipfile, err:
312             message = ("Could not open zip file: %s\n"
313                        " --> Inner message: %s"
314                        % (path, err))
315             raise Exception(message)
316
317         try:
318             self._extract_all(zip_file, scratch_dir)
319         finally:
320             zip_file.close()
321
322         return target_path
323
324     def _prepare_package(self, path, scratch_dir):
325         """Prepare a package for use, if necessary, and return the new path.
326
327         For example, this method unzips zipped files and extracts
328         tar files.
329
330         Args:
331           path: The path to the downloaded URL contents.
332           scratch_dir: The scratch directory.  Note that the scratch
333                        directory contains the file designated by the
334                        path parameter.
335
336         """
337         # FIXME: Add other natural extensions.
338         if path.endswith(".zip"):
339             new_path = self._unzip(path, scratch_dir)
340         elif path.endswith(".tar.gz"):
341             new_path = self._extract_targz(path, scratch_dir)
342         else:
343             # No preparation is needed.
344             new_path = path
345
346         return new_path
347
348     def _download_to_stream(self, url, stream):
349         """Download an URL to a stream, and return the number of bytes."""
350         try:
351             netstream = urllib.urlopen(url)
352         except IOError, err:
353             # Append existing Error message to new Error.
354             message = ('Could not download Python modules from URL "%s".\n'
355                        " Make sure you are connected to the internet.\n"
356                        " You must be connected to the internet when "
357                        "downloading needed modules for the first time.\n"
358                        " --> Inner message: %s"
359                        % (url, err))
360             raise IOError(message)
361         code = 200
362         if hasattr(netstream, "getcode"):
363             code = netstream.getcode()
364         if not 200 <= code < 300:
365             raise ValueError("HTTP Error code %s" % code)
366
367         BUFSIZE = 2**13  # 8KB
368         bytes = 0
369         while True:
370             data = netstream.read(BUFSIZE)
371             if not data:
372                 break
373             stream.write(data)
374             bytes += len(data)
375         netstream.close()
376         return bytes
377
378     def _download(self, url, scratch_dir):
379         """Download URL contents, and return the download path."""
380         url_path = urlparse.urlsplit(url)[2]
381         url_path = os.path.normpath(url_path)  # Removes trailing slash.
382         target_filename = os.path.basename(url_path)
383         target_path = os.path.join(scratch_dir, target_filename)
384
385         self._log_transfer("Starting download...", url, target_path)
386
387         with open(target_path, "wb") as stream:
388             bytes = self._download_to_stream(url, stream)
389
390         _log.debug("Downloaded %s bytes." % bytes)
391
392         return target_path
393
394     def _install(self, scratch_dir, package_name, target_path, url,
395                  url_subpath):
396         """Install a python package from an URL.
397
398         This internal method overwrites the target path if the target
399         path already exists.
400
401         """
402         path = self._download(url=url, scratch_dir=scratch_dir)
403         path = self._prepare_package(path, scratch_dir)
404
405         if url_subpath is None:
406             source_path = path
407         else:
408             source_path = os.path.join(path, url_subpath)
409
410         if os.path.exists(target_path):
411             _log.debug('Refreshing install: deleting "%s".' % target_path)
412             if os.path.isdir(target_path):
413                 shutil.rmtree(target_path)
414             else:
415                 os.remove(target_path)
416
417         self._log_transfer("Moving files into place...", source_path, target_path)
418
419         # The shutil.move() command creates intermediate directories if they
420         # do not exist, but we do not rely on this behavior since we
421         # need to create the __init__.py file anyway.
422         shutil.move(source_path, target_path)
423
424         self._record_url_downloaded(package_name, url)
425
426     def install(self, url, should_refresh=False, target_name=None,
427                 url_subpath=None):
428         """Install a python package from an URL.
429
430         Args:
431           url: The URL from which to download the package.
432
433         Optional Args:
434           should_refresh: A boolean value of whether the package should be
435                           downloaded again if the package is already present.
436           target_name: The name of the folder or file in the autoinstaller
437                        target directory at which the package should be
438                        installed.  Defaults to the base name of the
439                        URL sub-path.  This parameter must be provided if
440                        the URL sub-path is not specified.
441           url_subpath: The relative path of the URL directory that should
442                        be installed.  Defaults to the full directory, or
443                        the entire URL contents.
444
445         """
446         if target_name is None:
447             if not url_subpath:
448                 raise ValueError('The "target_name" parameter must be '
449                                  'provided if the "url_subpath" parameter '
450                                  "is not provided.")
451             # Remove any trailing slashes.
452             url_subpath = os.path.normpath(url_subpath)
453             target_name = os.path.basename(url_subpath)
454
455         target_path = os.path.join(self._target_dir, target_name)
456         if not should_refresh and self._is_downloaded(target_name, url):
457             _log.debug('URL for %s already downloaded.  Skipping...'
458                        % target_name)
459             _log.debug('    "%s"' % url)
460             return
461
462         self._log_transfer("Auto-installing package: %s" % target_name,
463                             url, target_path, log_method=_log.info)
464
465         # The scratch directory is where we will download and prepare
466         # files specific to this install until they are ready to move
467         # into place.
468         scratch_dir = self._create_scratch_directory(target_name)
469
470         try:
471             self._install(package_name=target_name,
472                           target_path=target_path,
473                           scratch_dir=scratch_dir,
474                           url=url,
475                           url_subpath=url_subpath)
476         except Exception, err:
477             # Append existing Error message to new Error.
478             message = ("Error auto-installing the %s package to:\n"
479                        ' "%s"\n'
480                        " --> Inner message: %s"
481                        % (target_name, target_path, err))
482             raise Exception(message)
483         finally:
484             _log.debug('Cleaning up: deleting "%s".' % scratch_dir)
485             shutil.rmtree(scratch_dir)
486         _log.debug('Auto-installed %s to:' % target_name)
487         _log.debug('    "%s"' % target_path)
488
489
490 if __name__=="__main__":
491
492     # Configure the autoinstall logger to log DEBUG messages for
493     # development testing purposes.
494     console = logging.StreamHandler()
495
496     formatter = logging.Formatter('%(name)s: %(levelname)-8s %(message)s')
497     console.setFormatter(formatter)
498     _log.addHandler(console)
499     _log.setLevel(logging.DEBUG)
500
501     # Use a more visible temp directory for debug purposes.
502     this_dir = os.path.dirname(__file__)
503     target_dir = os.path.join(this_dir, "autoinstalled")
504     temp_dir = os.path.join(target_dir, "Temp")
505
506     installer = AutoInstaller(target_dir=target_dir,
507                               temp_dir=temp_dir)
508
509     installer.install(should_refresh=False,
510                       target_name="pep8.py",
511                       url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
512                       url_subpath="pep8-0.5.0/pep8.py")
513     installer.install(should_refresh=False,
514                       target_name="mechanize",
515                       url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.1.11.zip",
516                       url_subpath="mechanize")
517