1#!/usr/bin/env python3.8
2
3import argparse
4import os
5import glob
6import tarfile
7import zipfile
8import shutil
9import pathlib
10import sys
11
12from typing import Generator, Any
13
14sys.path.insert(0, ".")
15
16from pegen import build
17from scripts import test_parse_directory
18
19HERE = pathlib.Path(__file__).resolve().parent
20
21argparser = argparse.ArgumentParser(
22    prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
23)
24argparser.add_argument(
25    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
26)
27
28
29def get_packages() -> Generator[str, None, None]:
30    all_packages = (
31        glob.glob("./data/pypi/*.tar.gz")
32        + glob.glob("./data/pypi/*.zip")
33        + glob.glob("./data/pypi/*.tgz")
34    )
35    for package in all_packages:
36        yield package
37
38
39def extract_files(filename: str) -> None:
40    savedir = os.path.join("data", "pypi")
41    if tarfile.is_tarfile(filename):
42        tarfile.open(filename).extractall(savedir)
43    elif zipfile.is_zipfile(filename):
44        zipfile.ZipFile(filename).extractall(savedir)
45    else:
46        raise ValueError(f"Could not identify type of compressed file {filename}")
47
48
49def find_dirname(package_name: str) -> str:
50    for name in os.listdir(os.path.join("data", "pypi")):
51        full_path = os.path.join("data", "pypi", name)
52        if os.path.isdir(full_path) and name in package_name:
53            return full_path
54    assert False  # This is to fix mypy, should never be reached
55
56
57def run_tests(dirname: str, tree: int) -> int:
58    return test_parse_directory.parse_directory(
59        dirname,
60        verbose=False,
61        excluded_files=[],
62        tree_arg=tree,
63        short=True,
64        mode=1 if tree else 0,
65        parser="pegen",
66    )
67
68
69def main() -> None:
70    args = argparser.parse_args()
71    tree = args.tree
72
73    for package in get_packages():
74        print(f"Extracting files from {package}... ", end="")
75        try:
76            extract_files(package)
77            print("Done")
78        except ValueError as e:
79            print(e)
80            continue
81
82        print(f"Trying to parse all python files ... ")
83        dirname = find_dirname(package)
84        status = run_tests(dirname, tree)
85        if status == 0:
86            shutil.rmtree(dirname)
87        else:
88            print(f"Failed to parse {dirname}")
89
90
91if __name__ == "__main__":
92    main()
93