1# -*- coding: utf-8 -*-
2"""
3past.translation
4==================
5
6The ``past.translation`` package provides an import hook for Python 3 which
7transparently runs ``futurize`` fixers over Python 2 code on import to convert
8print statements into functions, etc.
9
10It is intended to assist users in migrating to Python 3.x even if some
11dependencies still only support Python 2.x.
12
13Usage
14-----
15
16Once your Py2 package is installed in the usual module search path, the import
17hook is invoked as follows:
18
19    >>> from past.translation import autotranslate
20    >>> autotranslate('mypackagename')
21
22Or:
23
24    >>> autotranslate(['mypackage1', 'mypackage2'])
25
26You can unregister the hook using::
27
28    >>> from past.translation import remove_hooks
29    >>> remove_hooks()
30
31Author: Ed Schofield.
32Inspired by and based on ``uprefix`` by Vinay M. Sajip.
33"""
34
35import imp
36import logging
37import marshal
38import os
39import sys
40import copy
41from lib2to3.pgen2.parse import ParseError
42from lib2to3.refactor import RefactoringTool
43
44from libfuturize import fixes
45
46
47logger = logging.getLogger(__name__)
48logger.setLevel(logging.DEBUG)
49
50myfixes = (list(fixes.libfuturize_fix_names_stage1) +
51           list(fixes.lib2to3_fix_names_stage1) +
52           list(fixes.libfuturize_fix_names_stage2) +
53           list(fixes.lib2to3_fix_names_stage2))
54
55
56# We detect whether the code is Py2 or Py3 by applying certain lib2to3 fixers
57# to it. If the diff is empty, it's Python 3 code.
58
59py2_detect_fixers = [
60# From stage 1:
61    'lib2to3.fixes.fix_apply',
62    # 'lib2to3.fixes.fix_dict',        # TODO: add support for utils.viewitems() etc. and move to stage2
63    'lib2to3.fixes.fix_except',
64    'lib2to3.fixes.fix_execfile',
65    'lib2to3.fixes.fix_exitfunc',
66    'lib2to3.fixes.fix_funcattrs',
67    'lib2to3.fixes.fix_filter',
68    'lib2to3.fixes.fix_has_key',
69    'lib2to3.fixes.fix_idioms',
70    'lib2to3.fixes.fix_import',    # makes any implicit relative imports explicit. (Use with ``from __future__ import absolute_import)
71    'lib2to3.fixes.fix_intern',
72    'lib2to3.fixes.fix_isinstance',
73    'lib2to3.fixes.fix_methodattrs',
74    'lib2to3.fixes.fix_ne',
75    'lib2to3.fixes.fix_numliterals',    # turns 1L into 1, 0755 into 0o755
76    'lib2to3.fixes.fix_paren',
77    'lib2to3.fixes.fix_print',
78    'lib2to3.fixes.fix_raise',   # uses incompatible with_traceback() method on exceptions
79    'lib2to3.fixes.fix_renames',
80    'lib2to3.fixes.fix_reduce',
81    # 'lib2to3.fixes.fix_set_literal',  # this is unnecessary and breaks Py2.6 support
82    'lib2to3.fixes.fix_repr',
83    'lib2to3.fixes.fix_standarderror',
84    'lib2to3.fixes.fix_sys_exc',
85    'lib2to3.fixes.fix_throw',
86    'lib2to3.fixes.fix_tuple_params',
87    'lib2to3.fixes.fix_types',
88    'lib2to3.fixes.fix_ws_comma',
89    'lib2to3.fixes.fix_xreadlines',
90
91# From stage 2:
92    'lib2to3.fixes.fix_basestring',
93    # 'lib2to3.fixes.fix_buffer',    # perhaps not safe. Test this.
94    # 'lib2to3.fixes.fix_callable',  # not needed in Py3.2+
95    # 'lib2to3.fixes.fix_dict',        # TODO: add support for utils.viewitems() etc.
96    'lib2to3.fixes.fix_exec',
97    # 'lib2to3.fixes.fix_future',    # we don't want to remove __future__ imports
98    'lib2to3.fixes.fix_getcwdu',
99    # 'lib2to3.fixes.fix_imports',   # called by libfuturize.fixes.fix_future_standard_library
100    # 'lib2to3.fixes.fix_imports2',  # we don't handle this yet (dbm)
101    # 'lib2to3.fixes.fix_input',
102    # 'lib2to3.fixes.fix_itertools',
103    # 'lib2to3.fixes.fix_itertools_imports',
104    'lib2to3.fixes.fix_long',
105    # 'lib2to3.fixes.fix_map',
106    # 'lib2to3.fixes.fix_metaclass', # causes SyntaxError in Py2! Use the one from ``six`` instead
107    'lib2to3.fixes.fix_next',
108    'lib2to3.fixes.fix_nonzero',     # TODO: add a decorator for mapping __bool__ to __nonzero__
109    # 'lib2to3.fixes.fix_operator',    # we will need support for this by e.g. extending the Py2 operator module to provide those functions in Py3
110    'lib2to3.fixes.fix_raw_input',
111    # 'lib2to3.fixes.fix_unicode',   # strips off the u'' prefix, which removes a potentially helpful source of information for disambiguating unicode/byte strings
112    # 'lib2to3.fixes.fix_urllib',
113    'lib2to3.fixes.fix_xrange',
114    # 'lib2to3.fixes.fix_zip',
115]
116
117
118class RTs:
119    """
120    A namespace for the refactoring tools. This avoids creating these at
121    the module level, which slows down the module import. (See issue #117).
122
123    There are two possible grammars: with or without the print statement.
124    Hence we have two possible refactoring tool implementations.
125    """
126    _rt = None
127    _rtp = None
128    _rt_py2_detect = None
129    _rtp_py2_detect = None
130
131    @staticmethod
132    def setup():
133        """
134        Call this before using the refactoring tools to create them on demand
135        if needed.
136        """
137        if None in [RTs._rt, RTs._rtp]:
138            RTs._rt = RefactoringTool(myfixes)
139            RTs._rtp = RefactoringTool(myfixes, {'print_function': True})
140
141
142    @staticmethod
143    def setup_detect_python2():
144        """
145        Call this before using the refactoring tools to create them on demand
146        if needed.
147        """
148        if None in [RTs._rt_py2_detect, RTs._rtp_py2_detect]:
149            RTs._rt_py2_detect = RefactoringTool(py2_detect_fixers)
150            RTs._rtp_py2_detect = RefactoringTool(py2_detect_fixers,
151                                                  {'print_function': True})
152
153
154# We need to find a prefix for the standard library, as we don't want to
155# process any files there (they will already be Python 3).
156#
157# The following method is used by Sanjay Vinip in uprefix. This fails for
158# ``conda`` environments:
159#     # In a non-pythonv virtualenv, sys.real_prefix points to the installed Python.
160#     # In a pythonv venv, sys.base_prefix points to the installed Python.
161#     # Outside a virtual environment, sys.prefix points to the installed Python.
162
163#     if hasattr(sys, 'real_prefix'):
164#         _syslibprefix = sys.real_prefix
165#     else:
166#         _syslibprefix = getattr(sys, 'base_prefix', sys.prefix)
167
168# Instead, we use the portion of the path common to both the stdlib modules
169# ``math`` and ``urllib``.
170
171def splitall(path):
172    """
173    Split a path into all components. From Python Cookbook.
174    """
175    allparts = []
176    while True:
177        parts = os.path.split(path)
178        if parts[0] == path:  # sentinel for absolute paths
179            allparts.insert(0, parts[0])
180            break
181        elif parts[1] == path: # sentinel for relative paths
182            allparts.insert(0, parts[1])
183            break
184        else:
185            path = parts[0]
186            allparts.insert(0, parts[1])
187    return allparts
188
189
190def common_substring(s1, s2):
191    """
192    Returns the longest common substring to the two strings, starting from the
193    left.
194    """
195    chunks = []
196    path1 = splitall(s1)
197    path2 = splitall(s2)
198    for (dir1, dir2) in zip(path1, path2):
199        if dir1 != dir2:
200            break
201        chunks.append(dir1)
202    return os.path.join(*chunks)
203
204# _stdlibprefix = common_substring(math.__file__, urllib.__file__)
205
206
207def detect_python2(source, pathname):
208    """
209    Returns a bool indicating whether we think the code is Py2
210    """
211    RTs.setup_detect_python2()
212    try:
213        tree = RTs._rt_py2_detect.refactor_string(source, pathname)
214    except ParseError as e:
215        if e.msg != 'bad input' or e.value != '=':
216            raise
217        tree = RTs._rtp.refactor_string(source, pathname)
218
219    if source != str(tree)[:-1]:   # remove added newline
220        # The above fixers made changes, so we conclude it's Python 2 code
221        logger.debug('Detected Python 2 code: {0}'.format(pathname))
222        return True
223    else:
224        logger.debug('Detected Python 3 code: {0}'.format(pathname))
225        return False
226
227
228class Py2Fixer(object):
229    """
230    An import hook class that uses lib2to3 for source-to-source translation of
231    Py2 code to Py3.
232    """
233
234    # See the comments on :class:future.standard_library.RenameImport.
235    # We add this attribute here so remove_hooks() and install_hooks() can
236    # unambiguously detect whether the import hook is installed:
237    PY2FIXER = True
238
239    def __init__(self):
240        self.found = None
241        self.base_exclude_paths = ['future', 'past']
242        self.exclude_paths = copy.copy(self.base_exclude_paths)
243        self.include_paths = []
244
245    def include(self, paths):
246        """
247        Pass in a sequence of module names such as 'plotrique.plotting' that,
248        if present at the leftmost side of the full package name, would
249        specify the module to be transformed from Py2 to Py3.
250        """
251        self.include_paths += paths
252
253    def exclude(self, paths):
254        """
255        Pass in a sequence of strings such as 'mymodule' that, if
256        present at the leftmost side of the full package name, would cause
257        the module not to undergo any source transformation.
258        """
259        self.exclude_paths += paths
260
261    def find_module(self, fullname, path=None):
262        logger.debug('Running find_module: {0}...'.format(fullname))
263        if '.' in fullname:
264            parent, child = fullname.rsplit('.', 1)
265            if path is None:
266                loader = self.find_module(parent, path)
267                mod = loader.load_module(parent)
268                path = mod.__path__
269            fullname = child
270
271        # Perhaps we should try using the new importlib functionality in Python
272        # 3.3: something like this?
273        # thing = importlib.machinery.PathFinder.find_module(fullname, path)
274        try:
275            self.found = imp.find_module(fullname, path)
276        except Exception as e:
277            logger.debug('Py2Fixer could not find {0}')
278            logger.debug('Exception was: {0})'.format(fullname, e))
279            return None
280        self.kind = self.found[-1][-1]
281        if self.kind == imp.PKG_DIRECTORY:
282            self.pathname = os.path.join(self.found[1], '__init__.py')
283        elif self.kind == imp.PY_SOURCE:
284            self.pathname = self.found[1]
285        return self
286
287    def transform(self, source):
288        # This implementation uses lib2to3,
289        # you can override and use something else
290        # if that's better for you
291
292        # lib2to3 likes a newline at the end
293        RTs.setup()
294        source += '\n'
295        try:
296            tree = RTs._rt.refactor_string(source, self.pathname)
297        except ParseError as e:
298            if e.msg != 'bad input' or e.value != '=':
299                raise
300            tree = RTs._rtp.refactor_string(source, self.pathname)
301        # could optimise a bit for only doing str(tree) if
302        # getattr(tree, 'was_changed', False) returns True
303        return str(tree)[:-1] # remove added newline
304
305    def load_module(self, fullname):
306        logger.debug('Running load_module for {0}...'.format(fullname))
307        if fullname in sys.modules:
308            mod = sys.modules[fullname]
309        else:
310            if self.kind in (imp.PY_COMPILED, imp.C_EXTENSION, imp.C_BUILTIN,
311                             imp.PY_FROZEN):
312                convert = False
313            # elif (self.pathname.startswith(_stdlibprefix)
314            #       and 'site-packages' not in self.pathname):
315            #     # We assume it's a stdlib package in this case. Is this too brittle?
316            #     # Please file a bug report at https://github.com/PythonCharmers/python-future
317            #     # if so.
318            #     convert = False
319            # in theory, other paths could be configured to be excluded here too
320            elif any([fullname.startswith(path) for path in self.exclude_paths]):
321                convert = False
322            elif any([fullname.startswith(path) for path in self.include_paths]):
323                convert = True
324            else:
325                convert = False
326            if not convert:
327                logger.debug('Excluded {0} from translation'.format(fullname))
328                mod = imp.load_module(fullname, *self.found)
329            else:
330                logger.debug('Autoconverting {0} ...'.format(fullname))
331                mod = imp.new_module(fullname)
332                sys.modules[fullname] = mod
333
334                # required by PEP 302
335                mod.__file__ = self.pathname
336                mod.__name__ = fullname
337                mod.__loader__ = self
338
339                # This:
340                #     mod.__package__ = '.'.join(fullname.split('.')[:-1])
341                # seems to result in "SystemError: Parent module '' not loaded,
342                # cannot perform relative import" for a package's __init__.py
343                # file. We use the approach below. Another option to try is the
344                # minimal load_module pattern from the PEP 302 text instead.
345
346                # Is the test in the next line more or less robust than the
347                # following one? Presumably less ...
348                # ispkg = self.pathname.endswith('__init__.py')
349
350                if self.kind == imp.PKG_DIRECTORY:
351                    mod.__path__ = [ os.path.dirname(self.pathname) ]
352                    mod.__package__ = fullname
353                else:
354                    #else, regular module
355                    mod.__path__ = []
356                    mod.__package__ = fullname.rpartition('.')[0]
357
358                try:
359                    cachename = imp.cache_from_source(self.pathname)
360                    if not os.path.exists(cachename):
361                        update_cache = True
362                    else:
363                        sourcetime = os.stat(self.pathname).st_mtime
364                        cachetime = os.stat(cachename).st_mtime
365                        update_cache = cachetime < sourcetime
366                    # # Force update_cache to work around a problem with it being treated as Py3 code???
367                    # update_cache = True
368                    if not update_cache:
369                        with open(cachename, 'rb') as f:
370                            data = f.read()
371                            try:
372                                code = marshal.loads(data)
373                            except Exception:
374                                # pyc could be corrupt. Regenerate it
375                                update_cache = True
376                    if update_cache:
377                        if self.found[0]:
378                            source = self.found[0].read()
379                        elif self.kind == imp.PKG_DIRECTORY:
380                            with open(self.pathname) as f:
381                                source = f.read()
382
383                        if detect_python2(source, self.pathname):
384                            source = self.transform(source)
385
386                        code = compile(source, self.pathname, 'exec')
387
388                        dirname = os.path.dirname(cachename)
389                        try:
390                            if not os.path.exists(dirname):
391                                os.makedirs(dirname)
392                            with open(cachename, 'wb') as f:
393                                data = marshal.dumps(code)
394                                f.write(data)
395                        except Exception:   # could be write-protected
396                            pass
397                    exec(code, mod.__dict__)
398                except Exception as e:
399                    # must remove module from sys.modules
400                    del sys.modules[fullname]
401                    raise # keep it simple
402
403        if self.found[0]:
404            self.found[0].close()
405        return mod
406
407_hook = Py2Fixer()
408
409
410def install_hooks(include_paths=(), exclude_paths=()):
411    if isinstance(include_paths, str):
412        include_paths = (include_paths,)
413    if isinstance(exclude_paths, str):
414        exclude_paths = (exclude_paths,)
415    assert len(include_paths) + len(exclude_paths) > 0, 'Pass at least one argument'
416    _hook.include(include_paths)
417    _hook.exclude(exclude_paths)
418    # _hook.debug = debug
419    enable = sys.version_info[0] >= 3   # enabled for all 3.x+
420    if enable and _hook not in sys.meta_path:
421        sys.meta_path.insert(0, _hook)  # insert at beginning. This could be made a parameter
422
423    # We could return the hook when there are ways of configuring it
424    #return _hook
425
426
427def remove_hooks():
428    if _hook in sys.meta_path:
429        sys.meta_path.remove(_hook)
430
431
432def detect_hooks():
433    """
434    Returns True if the import hooks are installed, False if not.
435    """
436    return _hook in sys.meta_path
437    # present = any([hasattr(hook, 'PY2FIXER') for hook in sys.meta_path])
438    # return present
439
440
441class hooks(object):
442    """
443    Acts as a context manager. Use like this:
444
445    >>> from past import translation
446    >>> with translation.hooks():
447    ...     import mypy2module
448    >>> import requests        # py2/3 compatible anyway
449    >>> # etc.
450    """
451    def __enter__(self):
452        self.hooks_were_installed = detect_hooks()
453        install_hooks()
454        return self
455
456    def __exit__(self, *args):
457        if not self.hooks_were_installed:
458            remove_hooks()
459
460
461class suspend_hooks(object):
462    """
463    Acts as a context manager. Use like this:
464
465    >>> from past import translation
466    >>> translation.install_hooks()
467    >>> import http.client
468    >>> # ...
469    >>> with translation.suspend_hooks():
470    >>>     import requests     # or others that support Py2/3
471
472    If the hooks were disabled before the context, they are not installed when
473    the context is left.
474    """
475    def __enter__(self):
476        self.hooks_were_installed = detect_hooks()
477        remove_hooks()
478        return self
479    def __exit__(self, *args):
480        if self.hooks_were_installed:
481            install_hooks()
482
483
484# alias
485autotranslate = install_hooks
486