1"""
2Requirements file parsing
3"""
4
5from __future__ import absolute_import
6
7import optparse
8import os
9import re
10import shlex
11import sys
12
13from pip._vendor.six.moves import filterfalse
14from pip._vendor.six.moves.urllib import parse as urllib_parse
15
16from pip._internal.cli import cmdoptions
17from pip._internal.download import get_file_content
18from pip._internal.exceptions import RequirementsFileParseError
19from pip._internal.models.search_scope import SearchScope
20from pip._internal.req.constructors import (
21    install_req_from_editable, install_req_from_line,
22)
23from pip._internal.utils.typing import MYPY_CHECK_RUNNING
24
25if MYPY_CHECK_RUNNING:
26    from typing import (
27        Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple,
28    )
29    from pip._internal.req import InstallRequirement
30    from pip._internal.cache import WheelCache
31    from pip._internal.index import PackageFinder
32    from pip._internal.download import PipSession
33
34    ReqFileLines = Iterator[Tuple[int, Text]]
35
36__all__ = ['parse_requirements']
37
38SCHEME_RE = re.compile(r'^(http|https|file):', re.I)
39COMMENT_RE = re.compile(r'(^|\s+)#.*$')
40
41# Matches environment variable-style values in '${MY_VARIABLE_1}' with the
42# variable name consisting of only uppercase letters, digits or the '_'
43# (underscore). This follows the POSIX standard defined in IEEE Std 1003.1,
44# 2013 Edition.
45ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})')
46
47SUPPORTED_OPTIONS = [
48    cmdoptions.constraints,
49    cmdoptions.editable,
50    cmdoptions.requirements,
51    cmdoptions.no_index,
52    cmdoptions.index_url,
53    cmdoptions.find_links,
54    cmdoptions.extra_index_url,
55    cmdoptions.always_unzip,
56    cmdoptions.no_binary,
57    cmdoptions.only_binary,
58    cmdoptions.pre,
59    cmdoptions.trusted_host,
60    cmdoptions.require_hashes,
61]  # type: List[Callable[..., optparse.Option]]
62
63# options to be passed to requirements
64SUPPORTED_OPTIONS_REQ = [
65    cmdoptions.install_options,
66    cmdoptions.global_options,
67    cmdoptions.hash,
68]  # type: List[Callable[..., optparse.Option]]
69
70# the 'dest' string values
71SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ]
72
73
74def parse_requirements(
75    filename,  # type: str
76    finder=None,  # type: Optional[PackageFinder]
77    comes_from=None,  # type: Optional[str]
78    options=None,  # type: Optional[optparse.Values]
79    session=None,  # type: Optional[PipSession]
80    constraint=False,  # type: bool
81    wheel_cache=None,  # type: Optional[WheelCache]
82    use_pep517=None  # type: Optional[bool]
83):
84    # type: (...) -> Iterator[InstallRequirement]
85    """Parse a requirements file and yield InstallRequirement instances.
86
87    :param filename:    Path or url of requirements file.
88    :param finder:      Instance of pip.index.PackageFinder.
89    :param comes_from:  Origin description of requirements.
90    :param options:     cli options.
91    :param session:     Instance of pip.download.PipSession.
92    :param constraint:  If true, parsing a constraint file rather than
93        requirements file.
94    :param wheel_cache: Instance of pip.wheel.WheelCache
95    :param use_pep517:  Value of the --use-pep517 option.
96    """
97    if session is None:
98        raise TypeError(
99            "parse_requirements() missing 1 required keyword argument: "
100            "'session'"
101        )
102
103    _, content = get_file_content(
104        filename, comes_from=comes_from, session=session
105    )
106
107    lines_enum = preprocess(content, options)
108
109    for line_number, line in lines_enum:
110        req_iter = process_line(line, filename, line_number, finder,
111                                comes_from, options, session, wheel_cache,
112                                use_pep517=use_pep517, constraint=constraint)
113        for req in req_iter:
114            yield req
115
116
117def preprocess(content, options):
118    # type: (Text, Optional[optparse.Values]) -> ReqFileLines
119    """Split, filter, and join lines, and return a line iterator
120
121    :param content: the content of the requirements file
122    :param options: cli options
123    """
124    lines_enum = enumerate(content.splitlines(), start=1)  # type: ReqFileLines
125    lines_enum = join_lines(lines_enum)
126    lines_enum = ignore_comments(lines_enum)
127    lines_enum = skip_regex(lines_enum, options)
128    lines_enum = expand_env_variables(lines_enum)
129    return lines_enum
130
131
132def process_line(
133    line,  # type: Text
134    filename,  # type: str
135    line_number,  # type: int
136    finder=None,  # type: Optional[PackageFinder]
137    comes_from=None,  # type: Optional[str]
138    options=None,  # type: Optional[optparse.Values]
139    session=None,  # type: Optional[PipSession]
140    wheel_cache=None,  # type: Optional[WheelCache]
141    use_pep517=None,  # type: Optional[bool]
142    constraint=False,  # type: bool
143):
144    # type: (...) -> Iterator[InstallRequirement]
145    """Process a single requirements line; This can result in creating/yielding
146    requirements, or updating the finder.
147
148    For lines that contain requirements, the only options that have an effect
149    are from SUPPORTED_OPTIONS_REQ, and they are scoped to the
150    requirement. Other options from SUPPORTED_OPTIONS may be present, but are
151    ignored.
152
153    For lines that do not contain requirements, the only options that have an
154    effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may
155    be present, but are ignored. These lines may contain multiple options
156    (although our docs imply only one is supported), and all our parsed and
157    affect the finder.
158
159    :param constraint: If True, parsing a constraints file.
160    :param options: OptionParser options that we may update
161    """
162    parser = build_parser(line)
163    defaults = parser.get_default_values()
164    defaults.index_url = None
165    if finder:
166        defaults.format_control = finder.format_control
167    args_str, options_str = break_args_options(line)
168    # Prior to 2.7.3, shlex cannot deal with unicode entries
169    if sys.version_info < (2, 7, 3):
170        # https://github.com/python/mypy/issues/1174
171        options_str = options_str.encode('utf8')  # type: ignore
172    # https://github.com/python/mypy/issues/1174
173    opts, _ = parser.parse_args(
174        shlex.split(options_str), defaults)  # type: ignore
175
176    # preserve for the nested code path
177    line_comes_from = '%s %s (line %s)' % (
178        '-c' if constraint else '-r', filename, line_number,
179    )
180
181    # yield a line requirement
182    if args_str:
183        isolated = options.isolated_mode if options else False
184        if options:
185            cmdoptions.check_install_build_global(options, opts)
186        # get the options that apply to requirements
187        req_options = {}
188        for dest in SUPPORTED_OPTIONS_REQ_DEST:
189            if dest in opts.__dict__ and opts.__dict__[dest]:
190                req_options[dest] = opts.__dict__[dest]
191        line_source = 'line {} of {}'.format(line_number, filename)
192        yield install_req_from_line(
193            args_str,
194            comes_from=line_comes_from,
195            use_pep517=use_pep517,
196            isolated=isolated,
197            options=req_options,
198            wheel_cache=wheel_cache,
199            constraint=constraint,
200            line_source=line_source,
201        )
202
203    # yield an editable requirement
204    elif opts.editables:
205        isolated = options.isolated_mode if options else False
206        yield install_req_from_editable(
207            opts.editables[0], comes_from=line_comes_from,
208            use_pep517=use_pep517,
209            constraint=constraint, isolated=isolated, wheel_cache=wheel_cache
210        )
211
212    # parse a nested requirements file
213    elif opts.requirements or opts.constraints:
214        if opts.requirements:
215            req_path = opts.requirements[0]
216            nested_constraint = False
217        else:
218            req_path = opts.constraints[0]
219            nested_constraint = True
220        # original file is over http
221        if SCHEME_RE.search(filename):
222            # do a url join so relative paths work
223            req_path = urllib_parse.urljoin(filename, req_path)
224        # original file and nested file are paths
225        elif not SCHEME_RE.search(req_path):
226            # do a join so relative paths work
227            req_path = os.path.join(os.path.dirname(filename), req_path)
228        # TODO: Why not use `comes_from='-r {} (line {})'` here as well?
229        parsed_reqs = parse_requirements(
230            req_path, finder, comes_from, options, session,
231            constraint=nested_constraint, wheel_cache=wheel_cache
232        )
233        for req in parsed_reqs:
234            yield req
235
236    # percolate hash-checking option upward
237    elif opts.require_hashes:
238        options.require_hashes = opts.require_hashes
239
240    # set finder options
241    elif finder:
242        find_links = finder.find_links
243        index_urls = finder.index_urls
244        if opts.index_url:
245            index_urls = [opts.index_url]
246        if opts.no_index is True:
247            index_urls = []
248        if opts.extra_index_urls:
249            index_urls.extend(opts.extra_index_urls)
250        if opts.find_links:
251            # FIXME: it would be nice to keep track of the source
252            # of the find_links: support a find-links local path
253            # relative to a requirements file.
254            value = opts.find_links[0]
255            req_dir = os.path.dirname(os.path.abspath(filename))
256            relative_to_reqs_file = os.path.join(req_dir, value)
257            if os.path.exists(relative_to_reqs_file):
258                value = relative_to_reqs_file
259            find_links.append(value)
260
261        search_scope = SearchScope(
262            find_links=find_links,
263            index_urls=index_urls,
264        )
265        finder.search_scope = search_scope
266
267        if opts.pre:
268            finder.set_allow_all_prereleases()
269        for host in opts.trusted_hosts or []:
270            source = 'line {} of {}'.format(line_number, filename)
271            finder.add_trusted_host(host, source=source)
272
273
274def break_args_options(line):
275    # type: (Text) -> Tuple[str, Text]
276    """Break up the line into an args and options string.  We only want to shlex
277    (and then optparse) the options, not the args.  args can contain markers
278    which are corrupted by shlex.
279    """
280    tokens = line.split(' ')
281    args = []
282    options = tokens[:]
283    for token in tokens:
284        if token.startswith('-') or token.startswith('--'):
285            break
286        else:
287            args.append(token)
288            options.pop(0)
289    return ' '.join(args), ' '.join(options)  # type: ignore
290
291
292def build_parser(line):
293    # type: (Text) -> optparse.OptionParser
294    """
295    Return a parser for parsing requirement lines
296    """
297    parser = optparse.OptionParser(add_help_option=False)
298
299    option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ
300    for option_factory in option_factories:
301        option = option_factory()
302        parser.add_option(option)
303
304    # By default optparse sys.exits on parsing errors. We want to wrap
305    # that in our own exception.
306    def parser_exit(self, msg):
307        # type: (Any, str) -> NoReturn
308        # add offending line
309        msg = 'Invalid requirement: %s\n%s' % (line, msg)
310        raise RequirementsFileParseError(msg)
311    # NOTE: mypy disallows assigning to a method
312    #       https://github.com/python/mypy/issues/2427
313    parser.exit = parser_exit  # type: ignore
314
315    return parser
316
317
318def join_lines(lines_enum):
319    # type: (ReqFileLines) -> ReqFileLines
320    """Joins a line ending in '\' with the previous line (except when following
321    comments).  The joined line takes on the index of the first line.
322    """
323    primary_line_number = None
324    new_line = []  # type: List[Text]
325    for line_number, line in lines_enum:
326        if not line.endswith('\\') or COMMENT_RE.match(line):
327            if COMMENT_RE.match(line):
328                # this ensures comments are always matched later
329                line = ' ' + line
330            if new_line:
331                new_line.append(line)
332                yield primary_line_number, ''.join(new_line)
333                new_line = []
334            else:
335                yield line_number, line
336        else:
337            if not new_line:
338                primary_line_number = line_number
339            new_line.append(line.strip('\\'))
340
341    # last line contains \
342    if new_line:
343        yield primary_line_number, ''.join(new_line)
344
345    # TODO: handle space after '\'.
346
347
348def ignore_comments(lines_enum):
349    # type: (ReqFileLines) -> ReqFileLines
350    """
351    Strips comments and filter empty lines.
352    """
353    for line_number, line in lines_enum:
354        line = COMMENT_RE.sub('', line)
355        line = line.strip()
356        if line:
357            yield line_number, line
358
359
360def skip_regex(lines_enum, options):
361    # type: (ReqFileLines, Optional[optparse.Values]) -> ReqFileLines
362    """
363    Skip lines that match '--skip-requirements-regex' pattern
364
365    Note: the regex pattern is only built once
366    """
367    skip_regex = options.skip_requirements_regex if options else None
368    if skip_regex:
369        pattern = re.compile(skip_regex)
370        lines_enum = filterfalse(lambda e: pattern.search(e[1]), lines_enum)
371    return lines_enum
372
373
374def expand_env_variables(lines_enum):
375    # type: (ReqFileLines) -> ReqFileLines
376    """Replace all environment variables that can be retrieved via `os.getenv`.
377
378    The only allowed format for environment variables defined in the
379    requirement file is `${MY_VARIABLE_1}` to ensure two things:
380
381    1. Strings that contain a `$` aren't accidentally (partially) expanded.
382    2. Ensure consistency across platforms for requirement files.
383
384    These points are the result of a discussion on the `github pull
385    request #3514 <https://github.com/pypa/pip/pull/3514>`_.
386
387    Valid characters in variable names follow the `POSIX standard
388    <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited
389    to uppercase letter, digits and the `_` (underscore).
390    """
391    for line_number, line in lines_enum:
392        for env_var, var_name in ENV_VAR_RE.findall(line):
393            value = os.getenv(var_name)
394            if not value:
395                continue
396
397            line = line.replace(env_var, value)
398
399        yield line_number, line
400