1""" 2Requirements file parsing 3""" 4 5from __future__ import absolute_import 6 7import optparse 8import os 9import re 10import shlex 11import sys 12 13from pip._vendor.six.moves import filterfalse 14from pip._vendor.six.moves.urllib import parse as urllib_parse 15 16from pip._internal.cli import cmdoptions 17from pip._internal.download import get_file_content 18from pip._internal.exceptions import RequirementsFileParseError 19from pip._internal.models.search_scope import SearchScope 20from pip._internal.req.constructors import ( 21 install_req_from_editable, install_req_from_line, 22) 23from pip._internal.utils.typing import MYPY_CHECK_RUNNING 24 25if MYPY_CHECK_RUNNING: 26 from typing import ( 27 Any, Callable, Iterator, List, NoReturn, Optional, Text, Tuple, 28 ) 29 from pip._internal.req import InstallRequirement 30 from pip._internal.cache import WheelCache 31 from pip._internal.index import PackageFinder 32 from pip._internal.download import PipSession 33 34 ReqFileLines = Iterator[Tuple[int, Text]] 35 36__all__ = ['parse_requirements'] 37 38SCHEME_RE = re.compile(r'^(http|https|file):', re.I) 39COMMENT_RE = re.compile(r'(^|\s+)#.*$') 40 41# Matches environment variable-style values in '${MY_VARIABLE_1}' with the 42# variable name consisting of only uppercase letters, digits or the '_' 43# (underscore). This follows the POSIX standard defined in IEEE Std 1003.1, 44# 2013 Edition. 45ENV_VAR_RE = re.compile(r'(?P<var>\$\{(?P<name>[A-Z0-9_]+)\})') 46 47SUPPORTED_OPTIONS = [ 48 cmdoptions.constraints, 49 cmdoptions.editable, 50 cmdoptions.requirements, 51 cmdoptions.no_index, 52 cmdoptions.index_url, 53 cmdoptions.find_links, 54 cmdoptions.extra_index_url, 55 cmdoptions.always_unzip, 56 cmdoptions.no_binary, 57 cmdoptions.only_binary, 58 cmdoptions.pre, 59 cmdoptions.trusted_host, 60 cmdoptions.require_hashes, 61] # type: List[Callable[..., optparse.Option]] 62 63# options to be passed to requirements 64SUPPORTED_OPTIONS_REQ = [ 65 cmdoptions.install_options, 66 cmdoptions.global_options, 67 cmdoptions.hash, 68] # type: List[Callable[..., optparse.Option]] 69 70# the 'dest' string values 71SUPPORTED_OPTIONS_REQ_DEST = [str(o().dest) for o in SUPPORTED_OPTIONS_REQ] 72 73 74def parse_requirements( 75 filename, # type: str 76 finder=None, # type: Optional[PackageFinder] 77 comes_from=None, # type: Optional[str] 78 options=None, # type: Optional[optparse.Values] 79 session=None, # type: Optional[PipSession] 80 constraint=False, # type: bool 81 wheel_cache=None, # type: Optional[WheelCache] 82 use_pep517=None # type: Optional[bool] 83): 84 # type: (...) -> Iterator[InstallRequirement] 85 """Parse a requirements file and yield InstallRequirement instances. 86 87 :param filename: Path or url of requirements file. 88 :param finder: Instance of pip.index.PackageFinder. 89 :param comes_from: Origin description of requirements. 90 :param options: cli options. 91 :param session: Instance of pip.download.PipSession. 92 :param constraint: If true, parsing a constraint file rather than 93 requirements file. 94 :param wheel_cache: Instance of pip.wheel.WheelCache 95 :param use_pep517: Value of the --use-pep517 option. 96 """ 97 if session is None: 98 raise TypeError( 99 "parse_requirements() missing 1 required keyword argument: " 100 "'session'" 101 ) 102 103 _, content = get_file_content( 104 filename, comes_from=comes_from, session=session 105 ) 106 107 lines_enum = preprocess(content, options) 108 109 for line_number, line in lines_enum: 110 req_iter = process_line(line, filename, line_number, finder, 111 comes_from, options, session, wheel_cache, 112 use_pep517=use_pep517, constraint=constraint) 113 for req in req_iter: 114 yield req 115 116 117def preprocess(content, options): 118 # type: (Text, Optional[optparse.Values]) -> ReqFileLines 119 """Split, filter, and join lines, and return a line iterator 120 121 :param content: the content of the requirements file 122 :param options: cli options 123 """ 124 lines_enum = enumerate(content.splitlines(), start=1) # type: ReqFileLines 125 lines_enum = join_lines(lines_enum) 126 lines_enum = ignore_comments(lines_enum) 127 lines_enum = skip_regex(lines_enum, options) 128 lines_enum = expand_env_variables(lines_enum) 129 return lines_enum 130 131 132def process_line( 133 line, # type: Text 134 filename, # type: str 135 line_number, # type: int 136 finder=None, # type: Optional[PackageFinder] 137 comes_from=None, # type: Optional[str] 138 options=None, # type: Optional[optparse.Values] 139 session=None, # type: Optional[PipSession] 140 wheel_cache=None, # type: Optional[WheelCache] 141 use_pep517=None, # type: Optional[bool] 142 constraint=False, # type: bool 143): 144 # type: (...) -> Iterator[InstallRequirement] 145 """Process a single requirements line; This can result in creating/yielding 146 requirements, or updating the finder. 147 148 For lines that contain requirements, the only options that have an effect 149 are from SUPPORTED_OPTIONS_REQ, and they are scoped to the 150 requirement. Other options from SUPPORTED_OPTIONS may be present, but are 151 ignored. 152 153 For lines that do not contain requirements, the only options that have an 154 effect are from SUPPORTED_OPTIONS. Options from SUPPORTED_OPTIONS_REQ may 155 be present, but are ignored. These lines may contain multiple options 156 (although our docs imply only one is supported), and all our parsed and 157 affect the finder. 158 159 :param constraint: If True, parsing a constraints file. 160 :param options: OptionParser options that we may update 161 """ 162 parser = build_parser(line) 163 defaults = parser.get_default_values() 164 defaults.index_url = None 165 if finder: 166 defaults.format_control = finder.format_control 167 args_str, options_str = break_args_options(line) 168 # Prior to 2.7.3, shlex cannot deal with unicode entries 169 if sys.version_info < (2, 7, 3): 170 # https://github.com/python/mypy/issues/1174 171 options_str = options_str.encode('utf8') # type: ignore 172 # https://github.com/python/mypy/issues/1174 173 opts, _ = parser.parse_args( 174 shlex.split(options_str), defaults) # type: ignore 175 176 # preserve for the nested code path 177 line_comes_from = '%s %s (line %s)' % ( 178 '-c' if constraint else '-r', filename, line_number, 179 ) 180 181 # yield a line requirement 182 if args_str: 183 isolated = options.isolated_mode if options else False 184 if options: 185 cmdoptions.check_install_build_global(options, opts) 186 # get the options that apply to requirements 187 req_options = {} 188 for dest in SUPPORTED_OPTIONS_REQ_DEST: 189 if dest in opts.__dict__ and opts.__dict__[dest]: 190 req_options[dest] = opts.__dict__[dest] 191 line_source = 'line {} of {}'.format(line_number, filename) 192 yield install_req_from_line( 193 args_str, 194 comes_from=line_comes_from, 195 use_pep517=use_pep517, 196 isolated=isolated, 197 options=req_options, 198 wheel_cache=wheel_cache, 199 constraint=constraint, 200 line_source=line_source, 201 ) 202 203 # yield an editable requirement 204 elif opts.editables: 205 isolated = options.isolated_mode if options else False 206 yield install_req_from_editable( 207 opts.editables[0], comes_from=line_comes_from, 208 use_pep517=use_pep517, 209 constraint=constraint, isolated=isolated, wheel_cache=wheel_cache 210 ) 211 212 # parse a nested requirements file 213 elif opts.requirements or opts.constraints: 214 if opts.requirements: 215 req_path = opts.requirements[0] 216 nested_constraint = False 217 else: 218 req_path = opts.constraints[0] 219 nested_constraint = True 220 # original file is over http 221 if SCHEME_RE.search(filename): 222 # do a url join so relative paths work 223 req_path = urllib_parse.urljoin(filename, req_path) 224 # original file and nested file are paths 225 elif not SCHEME_RE.search(req_path): 226 # do a join so relative paths work 227 req_path = os.path.join(os.path.dirname(filename), req_path) 228 # TODO: Why not use `comes_from='-r {} (line {})'` here as well? 229 parsed_reqs = parse_requirements( 230 req_path, finder, comes_from, options, session, 231 constraint=nested_constraint, wheel_cache=wheel_cache 232 ) 233 for req in parsed_reqs: 234 yield req 235 236 # percolate hash-checking option upward 237 elif opts.require_hashes: 238 options.require_hashes = opts.require_hashes 239 240 # set finder options 241 elif finder: 242 find_links = finder.find_links 243 index_urls = finder.index_urls 244 if opts.index_url: 245 index_urls = [opts.index_url] 246 if opts.no_index is True: 247 index_urls = [] 248 if opts.extra_index_urls: 249 index_urls.extend(opts.extra_index_urls) 250 if opts.find_links: 251 # FIXME: it would be nice to keep track of the source 252 # of the find_links: support a find-links local path 253 # relative to a requirements file. 254 value = opts.find_links[0] 255 req_dir = os.path.dirname(os.path.abspath(filename)) 256 relative_to_reqs_file = os.path.join(req_dir, value) 257 if os.path.exists(relative_to_reqs_file): 258 value = relative_to_reqs_file 259 find_links.append(value) 260 261 search_scope = SearchScope( 262 find_links=find_links, 263 index_urls=index_urls, 264 ) 265 finder.search_scope = search_scope 266 267 if opts.pre: 268 finder.set_allow_all_prereleases() 269 for host in opts.trusted_hosts or []: 270 source = 'line {} of {}'.format(line_number, filename) 271 finder.add_trusted_host(host, source=source) 272 273 274def break_args_options(line): 275 # type: (Text) -> Tuple[str, Text] 276 """Break up the line into an args and options string. We only want to shlex 277 (and then optparse) the options, not the args. args can contain markers 278 which are corrupted by shlex. 279 """ 280 tokens = line.split(' ') 281 args = [] 282 options = tokens[:] 283 for token in tokens: 284 if token.startswith('-') or token.startswith('--'): 285 break 286 else: 287 args.append(token) 288 options.pop(0) 289 return ' '.join(args), ' '.join(options) # type: ignore 290 291 292def build_parser(line): 293 # type: (Text) -> optparse.OptionParser 294 """ 295 Return a parser for parsing requirement lines 296 """ 297 parser = optparse.OptionParser(add_help_option=False) 298 299 option_factories = SUPPORTED_OPTIONS + SUPPORTED_OPTIONS_REQ 300 for option_factory in option_factories: 301 option = option_factory() 302 parser.add_option(option) 303 304 # By default optparse sys.exits on parsing errors. We want to wrap 305 # that in our own exception. 306 def parser_exit(self, msg): 307 # type: (Any, str) -> NoReturn 308 # add offending line 309 msg = 'Invalid requirement: %s\n%s' % (line, msg) 310 raise RequirementsFileParseError(msg) 311 # NOTE: mypy disallows assigning to a method 312 # https://github.com/python/mypy/issues/2427 313 parser.exit = parser_exit # type: ignore 314 315 return parser 316 317 318def join_lines(lines_enum): 319 # type: (ReqFileLines) -> ReqFileLines 320 """Joins a line ending in '\' with the previous line (except when following 321 comments). The joined line takes on the index of the first line. 322 """ 323 primary_line_number = None 324 new_line = [] # type: List[Text] 325 for line_number, line in lines_enum: 326 if not line.endswith('\\') or COMMENT_RE.match(line): 327 if COMMENT_RE.match(line): 328 # this ensures comments are always matched later 329 line = ' ' + line 330 if new_line: 331 new_line.append(line) 332 yield primary_line_number, ''.join(new_line) 333 new_line = [] 334 else: 335 yield line_number, line 336 else: 337 if not new_line: 338 primary_line_number = line_number 339 new_line.append(line.strip('\\')) 340 341 # last line contains \ 342 if new_line: 343 yield primary_line_number, ''.join(new_line) 344 345 # TODO: handle space after '\'. 346 347 348def ignore_comments(lines_enum): 349 # type: (ReqFileLines) -> ReqFileLines 350 """ 351 Strips comments and filter empty lines. 352 """ 353 for line_number, line in lines_enum: 354 line = COMMENT_RE.sub('', line) 355 line = line.strip() 356 if line: 357 yield line_number, line 358 359 360def skip_regex(lines_enum, options): 361 # type: (ReqFileLines, Optional[optparse.Values]) -> ReqFileLines 362 """ 363 Skip lines that match '--skip-requirements-regex' pattern 364 365 Note: the regex pattern is only built once 366 """ 367 skip_regex = options.skip_requirements_regex if options else None 368 if skip_regex: 369 pattern = re.compile(skip_regex) 370 lines_enum = filterfalse(lambda e: pattern.search(e[1]), lines_enum) 371 return lines_enum 372 373 374def expand_env_variables(lines_enum): 375 # type: (ReqFileLines) -> ReqFileLines 376 """Replace all environment variables that can be retrieved via `os.getenv`. 377 378 The only allowed format for environment variables defined in the 379 requirement file is `${MY_VARIABLE_1}` to ensure two things: 380 381 1. Strings that contain a `$` aren't accidentally (partially) expanded. 382 2. Ensure consistency across platforms for requirement files. 383 384 These points are the result of a discussion on the `github pull 385 request #3514 <https://github.com/pypa/pip/pull/3514>`_. 386 387 Valid characters in variable names follow the `POSIX standard 388 <http://pubs.opengroup.org/onlinepubs/9699919799/>`_ and are limited 389 to uppercase letter, digits and the `_` (underscore). 390 """ 391 for line_number, line in lines_enum: 392 for env_var, var_name in ENV_VAR_RE.findall(line): 393 value = os.getenv(var_name) 394 if not value: 395 continue 396 397 line = line.replace(env_var, value) 398 399 yield line_number, line 400