1import abc 2import os 3import re 4from typing import Set, Dict, Union 5 6assert Set # make pyflakes happy 7 8 9class AbstractParser(abc.ABC): 10 """ This is the base class of every parser. 11 It might yield `ValueError` on instantiation on invalid files, 12 and `RuntimeError` when something went wrong in `remove_all`. 13 """ 14 meta_list = set() # type: Set[str] 15 mimetypes = set() # type: Set[str] 16 17 def __init__(self, filename: str) -> None: 18 """ 19 :raises ValueError: Raised upon an invalid file 20 """ 21 if re.search('^[a-z0-9./]', filename) is None: 22 # Some parsers are calling external binaries, 23 # this prevents shell command injections 24 filename = os.path.join('.', filename) 25 26 self.filename = filename 27 fname, extension = os.path.splitext(filename) 28 29 # Special case for tar.gz, tar.bz2, … files 30 if fname.endswith('.tar') and len(fname) > 4: 31 fname, extension = fname[:-4], '.tar' + extension 32 33 self.output_filename = fname + '.cleaned' + extension 34 self.lightweight_cleaning = False 35 self.sandbox = True 36 37 @abc.abstractmethod 38 def get_meta(self) -> Dict[str, Union[str, dict]]: 39 """Return all the metadata of the current file""" 40 41 @abc.abstractmethod 42 def remove_all(self) -> bool: 43 """ 44 Remove all the metadata of the current file 45 46 :raises RuntimeError: Raised if the cleaning process went wrong. 47 """ 48