Unreviewed followup to r136545, manually removing the unnecessary executable bit...
[WebKit-https.git] / Tools / Scripts / webkitpy / common / system / autoinstall.py
1 # Copyright (c) 2009, Daniel Krech All rights reserved.
2 # Copyright (C) 2010 Chris Jerdonek (cjerdonek@webkit.org)
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions are
6 # met:
7 #
8 #  * Redistributions of source code must retain the above copyright
9 # notice, this list of conditions and the following disclaimer.
10 #
11 #  * Redistributions in binary form must reproduce the above copyright
12 # notice, this list of conditions and the following disclaimer in the
13 # documentation and/or other materials provided with the distribution.
14 #
15 #  * Neither the name of the Daniel Krech nor the names of its
16 # contributors may be used to endorse or promote products derived from
17 # this software without specific prior written permission.
18 #
19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 # HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 """Support for automatically downloading Python packages from an URL."""
32
33
34 import codecs
35 import logging
36 import os
37 import shutil
38 import stat
39 import sys
40 import tarfile
41 import tempfile
42 import urllib
43 import urlparse
44 import zipfile
45
46 _log = logging.getLogger(__name__)
47
48
49 class AutoInstaller(object):
50
51     """Supports automatically installing Python packages from an URL.
52
53     Supports uncompressed files, .tar.gz, and .zip formats.
54
55     Basic usage:
56
57     installer = AutoInstaller()
58
59     installer.install(url="http://pypi.python.org/packages/source/p/pep8/pep8-0.5.0.tar.gz#md5=512a818af9979290cd619cce8e9c2e2b",
60                       url_subpath="pep8-0.5.0/pep8.py")
61     installer.install(url="http://pypi.python.org/packages/source/m/mechanize/mechanize-0.2.4.zip",
62                       url_subpath="mechanize")
63
64     """
65
66     def __init__(self, append_to_search_path=False, make_package=True,
67                  target_dir=None, temp_dir=None):
68         """Create an AutoInstaller instance, and set up the target directory.
69
70         Args:
71           append_to_search_path: A boolean value of whether to append the
72                                  target directory to the sys.path search path.
73           make_package: A boolean value of whether to make the target
74                         directory a package.  This adds an __init__.py file
75                         to the target directory -- allowing packages and
76                         modules within the target directory to be imported
77                         explicitly using dotted module names.
78           target_dir: The directory path to which packages should be installed.
79                       Defaults to a subdirectory of the folder containing
80                       this module called "autoinstalled".
81           temp_dir: The directory path to use for any temporary files
82                     generated while downloading, unzipping, and extracting
83                     packages to install.  Defaults to a standard temporary
84                     location generated by the tempfile module.  This
85                     parameter should normally be used only for development
86                     testing.
87
88         """
89         if target_dir is None:
90             this_dir = os.path.dirname(__file__)
91             target_dir = os.path.join(this_dir, "autoinstalled")
92
93         # Ensure that the target directory exists.
94         self._set_up_target_dir(target_dir, append_to_search_path, make_package)
95
96         self._target_dir = target_dir
97         self._temp_dir = temp_dir
98
99     def _write_file(self, path, text, encoding):
100         with codecs.open(path, "w", encoding) as filehandle:
101             filehandle.write(text)
102
103     def _set_up_target_dir(self, target_dir, append_to_search_path,
104                            make_package):
105         """Set up a target directory.
106
107         Args:
108           target_dir: The path to the target directory to set up.
109           append_to_search_path: A boolean value of whether to append the
110                                  target directory to the sys.path search path.
111           make_package: A boolean value of whether to make the target
112                         directory a package.  This adds an __init__.py file
113                         to the target directory -- allowing packages and
114                         modules within the target directory to be imported
115                         explicitly using dotted module names.
116
117         """
118         if not os.path.exists(target_dir):
119             os.makedirs(target_dir)
120
121         if append_to_search_path:
122             sys.path.append(target_dir)
123
124         if make_package:
125             self._make_package(target_dir)
126
127     def _make_package(self, target_dir):
128         init_path = os.path.join(target_dir, "__init__.py")
129         if not os.path.exists(init_path):
130             text = ("# This file is required for Python to search this "
131                     "directory for modules.\n")
132             self._write_file(init_path, text, "ascii")
133
134     def _create_scratch_directory_inner(self, prefix):
135         """Create a scratch directory without exception handling.
136
137         Creates a scratch directory inside the AutoInstaller temp
138         directory self._temp_dir, or inside a platform-dependent temp
139         directory if self._temp_dir is None.  Returns the path to the
140         created scratch directory.
141
142         Raises:
143           OSError: [Errno 2] if the containing temp directory self._temp_dir
144                              is not None and does not exist.
145
146         """
147         # The tempfile.mkdtemp() method function requires that the
148         # directory corresponding to the "dir" parameter already exist
149         # if it is not None.
150         scratch_dir = tempfile.mkdtemp(prefix=prefix.replace('/', '.'), dir=self._temp_dir)
151         return scratch_dir
152
153     def _create_scratch_directory(self, target_name):
154         """Create a temporary scratch directory, and return its path.
155
156         The scratch directory is generated inside the temp directory
157         of this AutoInstaller instance.  This method also creates the
158         temp directory if it does not already exist.
159
160         """
161         prefix = target_name.replace(os.sep, "_") + "_"
162         try:
163             scratch_dir = self._create_scratch_directory_inner(prefix)
164         except OSError:
165             # Handle case of containing temp directory not existing--
166             # OSError: [Errno 2] No such file or directory:...
167             temp_dir = self._temp_dir
168             if temp_dir is None or os.path.exists(temp_dir):
169                 raise
170             # Else try again after creating the temp directory.
171             os.makedirs(temp_dir)
172             scratch_dir = self._create_scratch_directory_inner(prefix)
173
174         return scratch_dir
175
176     def _url_downloaded_path(self, target_name):
177         return os.path.join(self._target_dir, ".%s.url" % target_name.replace('/', '_'))
178
179     def _is_downloaded(self, target_name, url):
180         version_path = self._url_downloaded_path(target_name)
181
182         if not os.path.exists(version_path):
183             return False
184
185         with codecs.open(version_path, "r", "utf-8") as filehandle:
186             return filehandle.read().strip() == url.strip()
187
188     def _record_url_downloaded(self, target_name, url):
189         version_path = self._url_downloaded_path(target_name)
190         self._write_file(version_path, url, "utf-8")
191
192     def _extract_targz(self, path, scratch_dir):
193         # tarfile.extractall() extracts to a path without the trailing ".tar.gz".
194         target_basename = os.path.basename(path[:-len(".tar.gz")])
195         target_path = os.path.join(scratch_dir, target_basename)
196
197         try:
198             tar_file = tarfile.open(path)
199         except tarfile.ReadError, err:
200             # Append existing Error message to new Error.
201             message = ("Could not open tar file: %s\n"
202                        " The file probably does not have the correct format.\n"
203                        " --> Inner message: %s"
204                        % (path, err))
205             raise Exception(message)
206
207         try:
208             tar_file.extractall(target_path)
209         finally:
210             tar_file.close()
211
212         return target_path
213
214     # This is a replacement for ZipFile.extractall(), which is
215     # available in Python 2.6 but not in earlier versions.
216     # NOTE: The version in 2.6.1 (which shipped on Snow Leopard) is broken!
217     def _extract_all(self, zip_file, target_dir):
218         for name in zip_file.namelist():
219             path = os.path.join(target_dir, name)
220             if not os.path.basename(path):
221                 # Then the path ends in a slash, so it is a directory.
222                 os.makedirs(path)
223                 continue
224
225             try:
226                 # We open this file w/o encoding, as we're reading/writing
227                 # the raw byte-stream from the zip file.
228                 outfile = open(path, 'wb')
229             except IOError:
230                 # Not all zip files seem to list the directories explicitly,
231                 # so try again after creating the containing directory.
232                 _log.debug("Got IOError: retrying after creating directory...")
233                 dirname = os.path.dirname(path)
234                 os.makedirs(dirname)
235                 outfile = open(path, 'wb')
236
237             try:
238                 outfile.write(zip_file.read(name))
239             finally:
240                 outfile.close()
241
242     def _unzip(self, path, scratch_dir):
243         # zipfile.extractall() extracts to a path without the trailing ".zip".
244         target_basename = os.path.basename(path[:-len(".zip")])
245         target_path = os.path.join(scratch_dir, target_basename)
246
247         try:
248             zip_file = zipfile.ZipFile(path, "r")
249         except zipfile.BadZipfile, err:
250             message = ("Could not open zip file: %s\n"
251                        " --> Inner message: %s"
252                        % (path, err))
253             raise Exception(message)
254
255         try:
256             self._extract_all(zip_file, scratch_dir)
257         finally:
258             zip_file.close()
259
260         return target_path
261
262     def _prepare_package(self, path, scratch_dir):
263         """Prepare a package for use, if necessary, and return the new path.
264
265         For example, this method unzips zipped files and extracts
266         tar files.
267
268         Args:
269           path: The path to the downloaded URL contents.
270           scratch_dir: The scratch directory.  Note that the scratch
271                        directory contains the file designated by the
272                        path parameter.
273
274         """
275         # FIXME: Add other natural extensions.
276         if path.endswith(".zip"):
277             new_path = self._unzip(path, scratch_dir)
278         elif path.endswith(".tar.gz"):
279             new_path = self._extract_targz(path, scratch_dir)
280         else:
281             # No preparation is needed.
282             new_path = path
283
284         return new_path
285
286     def _download_to_stream(self, url, stream):
287         try:
288             netstream = urllib.urlopen(url)
289         except IOError, err:
290             # Append existing Error message to new Error.
291             message = ('Could not download Python modules from URL "%s".\n'
292                        " Make sure you are connected to the internet.\n"
293                        " You must be connected to the internet when "
294                        "downloading needed modules for the first time.\n"
295                        " --> Inner message: %s"
296                        % (url, err))
297             raise IOError(message)
298         code = 200
299         if hasattr(netstream, "getcode"):
300             code = netstream.getcode()
301         if not 200 <= code < 300:
302             raise ValueError("HTTP Error code %s" % code)
303
304         BUFSIZE = 2**13  # 8KB
305         while True:
306             data = netstream.read(BUFSIZE)
307             if not data:
308                 break
309             stream.write(data)
310         netstream.close()
311
312     def _download(self, url, scratch_dir):
313         url_path = urlparse.urlsplit(url)[2]
314         url_path = os.path.normpath(url_path)  # Removes trailing slash.
315         target_filename = os.path.basename(url_path)
316         target_path = os.path.join(scratch_dir, target_filename)
317
318         with open(target_path, "wb") as stream:
319             self._download_to_stream(url, stream)
320
321         return target_path
322
323     def _install(self, scratch_dir, package_name, target_path, url, url_subpath, files_to_remove):
324         """Install a python package from an URL.
325
326         This internal method overwrites the target path if the target
327         path already exists.
328
329         """
330         path = self._download(url=url, scratch_dir=scratch_dir)
331         path = self._prepare_package(path, scratch_dir)
332
333         if url_subpath is None:
334             source_path = path
335         else:
336             source_path = os.path.join(path, url_subpath)
337
338         for filename in files_to_remove:
339             path = os.path.join(source_path, filename.replace('/', os.sep))
340             if os.path.exists(path):
341                 # Pre-emptively change the permissions to #0777 to try and work around win32 permissions issues.
342                 os.chmod(path, stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO)
343                 os.remove(path)
344
345         if os.path.exists(target_path):
346             if os.path.isdir(target_path):
347                 shutil.rmtree(target_path, ignore_errors=True)
348             else:
349                 os.remove(target_path)
350
351         # shutil.move() command creates intermediate directories if they do not exist.
352         shutil.move(source_path, target_path)
353
354         # ensure all the new directories are importable.
355         intermediate_dirs = os.path.dirname(os.path.relpath(target_path, self._target_dir))
356         parent_dirname = self._target_dir
357         for dirname in intermediate_dirs.split(os.sep):
358             parent_dirname = os.path.join(parent_dirname, dirname)
359             self._make_package(parent_dirname)
360
361         self._record_url_downloaded(package_name, url)
362
363     def install(self, url, should_refresh=False, target_name=None,
364                 url_subpath=None, files_to_remove=None):
365         """Install a python package from an URL.
366
367         Args:
368           url: The URL from which to download the package.
369
370         Optional Args:
371           should_refresh: A boolean value of whether the package should be
372                           downloaded again if the package is already present.
373           target_name: The name of the folder or file in the autoinstaller
374                        target directory at which the package should be
375                        installed.  Defaults to the base name of the
376                        URL sub-path.  This parameter must be provided if
377                        the URL sub-path is not specified.
378           url_subpath: The relative path of the URL directory that should
379                        be installed.  Defaults to the full directory, or
380                        the entire URL contents.
381
382         """
383         if target_name is None:
384             if not url_subpath:
385                 raise ValueError('The "target_name" parameter must be '
386                                  'provided if the "url_subpath" parameter '
387                                  "is not provided.")
388             # Remove any trailing slashes.
389             url_subpath = os.path.normpath(url_subpath)
390             target_name = os.path.basename(url_subpath)
391
392         target_path = os.path.join(self._target_dir, target_name.replace('/', os.sep))
393         if not should_refresh and self._is_downloaded(target_name, url):
394             return False
395
396         files_to_remove = files_to_remove or []
397         package_name = target_name.replace(os.sep, '.')
398         _log.info("Auto-installing package: %s" % package_name)
399
400         # The scratch directory is where we will download and prepare
401         # files specific to this install until they are ready to move
402         # into place.
403         scratch_dir = self._create_scratch_directory(target_name)
404
405         try:
406             self._install(package_name=package_name,
407                           target_path=target_path,
408                           scratch_dir=scratch_dir,
409                           url=url,
410                           url_subpath=url_subpath,
411                           files_to_remove=files_to_remove)
412         except Exception, err:
413             # Append existing Error message to new Error.
414             message = ("Error auto-installing the %s package to:\n"
415                        ' "%s"\n'
416                        " --> Inner message: %s"
417                        % (target_name, target_path, err))
418             raise Exception(message)
419         finally:
420             shutil.rmtree(scratch_dir, ignore_errors=True)
421         _log.debug('Auto-installed %s to:' % url)
422         _log.debug('    "%s"' % target_path)
423         return True