1import abc
2import os
3import re
4from typing import Set, Dict, Union
5
6assert Set  # make pyflakes happy
7
8
9class AbstractParser(abc.ABC):
10    """ This is the base class of every parser.
11    It might yield `ValueError` on instantiation on invalid files,
12    and `RuntimeError` when something went wrong in `remove_all`.
13    """
14    meta_list = set()  # type: Set[str]
15    mimetypes = set()  # type: Set[str]
16
17    def __init__(self, filename: str) -> None:
18        """
19        :raises ValueError: Raised upon an invalid file
20        """
21        if re.search('^[a-z0-9./]', filename) is None:
22            # Some parsers are calling external binaries,
23            # this prevents shell command injections
24            filename = os.path.join('.', filename)
25
26        self.filename = filename
27        fname, extension = os.path.splitext(filename)
28
29        # Special case for tar.gz, tar.bz2, … files
30        if fname.endswith('.tar') and len(fname) > 4:
31            fname, extension = fname[:-4], '.tar' + extension
32
33        self.output_filename = fname + '.cleaned' + extension
34        self.lightweight_cleaning = False
35        self.sandbox = True
36
37    @abc.abstractmethod
38    def get_meta(self) -> Dict[str, Union[str, dict]]:
39        """Return all the metadata of the current file"""
40
41    @abc.abstractmethod
42    def remove_all(self) -> bool:
43        """
44        Remove all the metadata of the current file
45
46        :raises RuntimeError: Raised if the cleaning process went wrong.
47        """
48