1# epydoc -- Utility functions
2#
3# Copyright (C) 2005 Edward Loper
4# Author: Edward Loper <edloper@loper.org>
5# URL: <http://epydoc.sf.net>
6#
7# $Id: util.py 1671 2008-01-29 02:55:49Z edloper $
8
9"""
10Miscellaneous utility functions that are used by multiple modules.
11
12@group Python source types: is_module_file, is_package_dir, is_pyname,
13    py_src_filename
14@group Text processing: wordwrap, decode_with_backslashreplace,
15    plaintext_to_html
16"""
17__docformat__ = 'epytext en'
18
19import os, os.path, re
20
21######################################################################
22## Python Source Types
23######################################################################
24
25PY_SRC_EXTENSIONS = ['.py', '.pyw']
26PY_BIN_EXTENSIONS = ['.pyc', '.so', '.pyd']
27
28def is_module_file(path):
29    # Make sure it's a file name.
30    if not isinstance(path, basestring):
31        return False
32    (dir, filename) = os.path.split(path)
33    (basename, extension) = os.path.splitext(filename)
34    return (os.path.isfile(path) and
35            re.match('[a-zA-Z_]\w*$', basename) and
36            extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS)
37
38def is_src_filename(filename):
39    if not isinstance(filename, basestring): return False
40    if not os.path.exists(filename): return False
41    return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS
42
43def is_package_dir(dirname):
44    """
45    Return true if the given directory is a valid package directory
46    (i.e., it names a directory that contains a valid __init__ file,
47    and its name is a valid identifier).
48    """
49    # Make sure it's a directory name.
50    if not isinstance(dirname, basestring):
51        return False
52    if not os.path.isdir(dirname):
53        return False
54    dirname = os.path.abspath(dirname)
55    # Make sure it's a valid identifier.  (Special case for
56    # "foo/", where os.path.split -> ("foo", "").)
57    (parent, dir) = os.path.split(dirname)
58    if dir == '': (parent, dir) = os.path.split(parent)
59
60    # The following constraint was removed because of sourceforge
61    # bug #1787028 -- in some cases (eg eggs), it's too strict.
62    #if not re.match('\w+$', dir):
63    #    return False
64
65    for name in os.listdir(dirname):
66        filename = os.path.join(dirname, name)
67        if name.startswith('__init__.') and is_module_file(filename):
68            return True
69    else:
70        return False
71
72def is_pyname(name):
73    return re.match(r"\w+(\.\w+)*$", name)
74
75def py_src_filename(filename):
76    basefile, extension = os.path.splitext(filename)
77    if extension in PY_SRC_EXTENSIONS:
78        return filename
79    else:
80        for ext in PY_SRC_EXTENSIONS:
81            if os.path.isfile('%s%s' % (basefile, ext)):
82                return '%s%s' % (basefile, ext)
83        else:
84            raise ValueError('Could not find a corresponding '
85                             'Python source file for %r.' % filename)
86
87def munge_script_name(filename):
88    name = os.path.split(filename)[1]
89    name = re.sub(r'\W', '_', name)
90    return 'script-'+name
91
92######################################################################
93## Text Processing
94######################################################################
95
96def decode_with_backslashreplace(s):
97    r"""
98    Convert the given 8-bit string into unicode, treating any
99    character c such that ord(c)<128 as an ascii character, and
100    converting any c such that ord(c)>128 into a backslashed escape
101    sequence.
102
103        >>> decode_with_backslashreplace('abc\xff\xe8')
104        u'abc\\xff\\xe8'
105    """
106    # s.encode('string-escape') is not appropriate here, since it
107    # also adds backslashes to some ascii chars (eg \ and ').
108    assert isinstance(s, str)
109    return (s
110            .decode('latin1')
111            .encode('ascii', 'backslashreplace')
112            .decode('ascii'))
113
114def wordwrap(str, indent=0, right=75, startindex=0, splitchars=''):
115    """
116    Word-wrap the given string.  I.e., add newlines to the string such
117    that any lines that are longer than C{right} are broken into
118    shorter lines (at the first whitespace sequence that occurs before
119    index C{right}).  If the given string contains newlines, they will
120    I{not} be removed.  Any lines that begin with whitespace will not
121    be wordwrapped.
122
123    @param indent: If specified, then indent each line by this number
124        of spaces.
125    @type indent: C{int}
126    @param right: The right margin for word wrapping.  Lines that are
127        longer than C{right} will be broken at the first whitespace
128        sequence before the right margin.
129    @type right: C{int}
130    @param startindex: If specified, then assume that the first line
131        is already preceeded by C{startindex} characters.
132    @type startindex: C{int}
133    @param splitchars: A list of non-whitespace characters which can
134        be used to split a line.  (E.g., use '/\\' to allow path names
135        to be split over multiple lines.)
136    @rtype: C{str}
137    """
138    if splitchars:
139        chunks = re.split(r'( +|\n|[^ \n%s]*[%s])' %
140                          (re.escape(splitchars), re.escape(splitchars)),
141                          str.expandtabs())
142    else:
143        chunks = re.split(r'( +|\n)', str.expandtabs())
144    result = [' '*(indent-startindex)]
145    charindex = max(indent, startindex)
146    for chunknum, chunk in enumerate(chunks):
147        if (charindex+len(chunk) > right and charindex > 0) or chunk == '\n':
148            result.append('\n' + ' '*indent)
149            charindex = indent
150            if chunk[:1] not in ('\n', ' '):
151                result.append(chunk)
152                charindex += len(chunk)
153        else:
154            result.append(chunk)
155            charindex += len(chunk)
156    return ''.join(result).rstrip()+'\n'
157
158def plaintext_to_html(s):
159    """
160    @return: An HTML string that encodes the given plaintext string.
161    In particular, special characters (such as C{'<'} and C{'&'})
162    are escaped.
163    @rtype: C{string}
164    """
165    s = s.replace('&', '&amp;').replace('"', '&quot;')
166    s = s.replace('<', '&lt;').replace('>', '&gt;')
167    return s
168
169def plaintext_to_latex(str, nbsp=0, breakany=0):
170    """
171    @return: A LaTeX string that encodes the given plaintext string.
172    In particular, special characters (such as C{'$'} and C{'_'})
173    are escaped, and tabs are expanded.
174    @rtype: C{string}
175    @param breakany: Insert hyphenation marks, so that LaTeX can
176    break the resulting string at any point.  This is useful for
177    small boxes (e.g., the type box in the variable list table).
178    @param nbsp: Replace every space with a non-breaking space
179    (C{'~'}).
180    """
181    # These get converted to hyphenation points later
182    if breakany: str = re.sub('(.)', '\\1\1', str)
183
184    # These get converted to \textbackslash later.
185    str = str.replace('\\', '\0')
186
187    # Expand tabs
188    str = str.expandtabs()
189
190    # These elements need to be backslashed.
191    str = re.sub(r'([#$&%_\${}])', r'\\\1', str)
192
193    # These elements have special names.
194    str = str.replace('|', '{\\textbar}')
195    str = str.replace('<', '{\\textless}')
196    str = str.replace('>', '{\\textgreater}')
197    str = str.replace('^', '{\\textasciicircum}')
198    str = str.replace('~', '{\\textasciitilde}')
199    str = str.replace('\0', r'{\textbackslash}')
200
201    # replace spaces with non-breaking spaces
202    if nbsp: str = str.replace(' ', '~')
203
204    # Convert \1's to hyphenation points.
205    if breakany: str = str.replace('\1', r'\-')
206
207    return str
208
209class RunSubprocessError(OSError):
210    def __init__(self, cmd, out, err):
211        OSError.__init__(self, '%s failed' % cmd[0])
212        self.out = out
213        self.err = err
214
215def run_subprocess(cmd, data=None):
216    """
217    Execute the command C{cmd} in a subprocess.
218
219    @param cmd: The command to execute, specified as a list
220        of string.
221    @param data: A string containing data to send to the
222        subprocess.
223    @return: A tuple C{(out, err)}.
224    @raise OSError: If there is any problem executing the
225        command, or if its exitval is not 0.
226    """
227    if isinstance(cmd, basestring):
228        cmd = cmd.split()
229
230    # Under Python 2.4+, use subprocess
231    try:
232        from subprocess import Popen, PIPE
233        pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
234        out, err = pipe.communicate(data)
235        if hasattr(pipe, 'returncode'):
236            if pipe.returncode == 0:
237                return out, err
238            else:
239                raise RunSubprocessError(cmd, out, err)
240        else:
241            # Assume that there was an error iff anything was written
242            # to the child's stderr.
243            if err == '':
244                return out, err
245            else:
246                raise RunSubprocessError(cmd, out, err)
247    except ImportError:
248        pass
249
250    # Under Python 2.3 or earlier, on unix, use popen2.Popen3 so we
251    # can access the return value.
252    import popen2
253    if hasattr(popen2, 'Popen3'):
254        pipe = popen2.Popen3(' '.join(cmd), True)
255        to_child = pipe.tochild
256        from_child = pipe.fromchild
257        child_err = pipe.childerr
258        if data:
259            to_child.write(data)
260        to_child.close()
261        out = err = ''
262        while pipe.poll() is None:
263            out += from_child.read()
264            err += child_err.read()
265        out += from_child.read()
266        err += child_err.read()
267        if pipe.wait() == 0:
268            return out, err
269        else:
270            raise RunSubprocessError(cmd, out, err)
271
272    # Under Python 2.3 or earlier, on non-unix, use os.popen3
273    else:
274        to_child, from_child, child_err = os.popen3(' '.join(cmd), 'b')
275        if data:
276            try:
277                to_child.write(data)
278            # Guard for a broken pipe error
279            except IOError, e:
280                raise OSError(e)
281        to_child.close()
282        out = from_child.read()
283        err = child_err.read()
284        # Assume that there was an error iff anything was written
285        # to the child's stderr.
286        if err == '':
287            return out, err
288        else:
289            raise RunSubprocessError(cmd, out, err)
290