1#!/usr/bin/python3 -OO
2# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org>
3#
4# This program is free software; you can redistribute it and/or
5# modify it under the terms of the GNU General Public License
6# as published by the Free Software Foundation; either version 2
7# of the License, or (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program; if not, write to the Free Software
16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17
18"""
19
20Deobfuscation post-processing script:
21
22Will check in the completed job folder if maybe there are par2 files,
23for example "rename.par2", and use those to rename the files.
24If there is no "rename.par2" available, it will rename large, not-excluded
25files to the job-name in the queue if the filename looks obfuscated
26
27Based on work by P1nGu1n
28
29"""
30
31import hashlib
32import logging
33import os
34import re
35
36from sabnzbd.filesystem import get_unique_filename, renamer, get_ext
37from sabnzbd.par2file import is_parfile, parse_par2_file
38
39# Files to exclude and minimal file size for renaming
40EXCLUDED_FILE_EXTS = (".vob", ".rar", ".par2", ".mts", ".m2ts", ".cpi", ".clpi", ".mpl", ".mpls", ".bdm", ".bdmv")
41MIN_FILE_SIZE = 10 * 1024 * 1024
42
43
44def decode_par2(parfile):
45    """Parse a par2 file and rename files listed in the par2 to their real name"""
46    # Check if really a par2 file
47    if not is_parfile(parfile):
48        logging.info("Par2 file %s was not really a par2 file")
49        return False
50
51    # Parse the par2 file
52    md5of16k = {}
53    parse_par2_file(parfile, md5of16k)
54
55    # Parse all files in the folder
56    dirname = os.path.dirname(parfile)
57    result = False
58    for fn in os.listdir(dirname):
59        filepath = os.path.join(dirname, fn)
60        # Only check files
61        if os.path.isfile(filepath):
62            with open(filepath, "rb") as fileToMatch:
63                first16k_data = fileToMatch.read(16384)
64
65            # Check if we have this hash
66            file_md5of16k = hashlib.md5(first16k_data).digest()
67            if file_md5of16k in md5of16k:
68                new_path = os.path.join(dirname, md5of16k[file_md5of16k])
69                # Make sure it's a unique name
70                renamer(filepath, get_unique_filename(new_path))
71                result = True
72    return result
73
74
75def is_probably_obfuscated(myinputfilename):
76    """Returns boolean if filename is likely obfuscated. Default: True
77    myinputfilename (string) can be a plain file name, or a full path"""
78
79    # Find filebasename
80    path, filename = os.path.split(myinputfilename)
81    filebasename, fileextension = os.path.splitext(filename)
82
83    # First fixed patterns that we know of:
84    logging.debug("Checking: %s", filebasename)
85
86    # ...blabla.H.264/b082fa0beaa644d3aa01045d5b8d0b36.mkv is certainly obfuscated
87    if re.findall(r"^[a-f0-9]{32}$", filebasename):
88        logging.debug("Obfuscated: 32 hex digit")
89        # exactly 32 hex digits, so:
90        return True
91
92    # 0675e29e9abfd2.f7d069dab0b853283cc1b069a25f82.6547
93    if re.findall(r"^[a-f0-9\.]{40,}$", filebasename):
94        logging.debug("Obfuscated: starting with 40+ lower case hex digits and/or dots")
95        return True
96
97    # /some/thing/abc.xyz.a4c567edbcbf27.BLA is certainly obfuscated
98    if re.findall(r"^abc\.xyz", filebasename):
99        logging.debug("Obfuscated: starts with 'abc.xyz'")
100        # ... which we consider as obfuscated:
101        return True
102
103    # these are signals for the obfuscation versus non-obfuscation
104    decimals = sum(1 for c in filebasename if c.isnumeric())
105    upperchars = sum(1 for c in filebasename if c.isupper())
106    lowerchars = sum(1 for c in filebasename if c.islower())
107    spacesdots = sum(1 for c in filebasename if c == " " or c == "." or c == "_")  # space-like symbols
108
109    # Example: "Great Distro"
110    if upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1:
111        logging.debug("Not obfuscated: upperchars >= 2 and lowerchars >= 2  and spacesdots >= 1")
112        return False
113
114    # Example: "this is a download"
115    if spacesdots >= 3:
116        logging.debug("Not obfuscated: spacesdots >= 3")
117        return False
118
119    # Example: "Beast 2020"
120    if (upperchars + lowerchars >= 4) and decimals >= 4 and spacesdots >= 1:
121        logging.debug("Not obfuscated: (upperchars + lowerchars >= 4) and decimals > 3 and spacesdots > 1")
122        return False
123
124    # Example: "Catullus", starts with a capital, and most letters are lower case
125    if filebasename[0].isupper() and lowerchars > 2 and upperchars / lowerchars <= 0.25:
126        logging.debug("Not obfuscated: starts with a capital, and most letters are lower case")
127        return False
128
129    # If we get here, no trigger for a clear name was found, so let's default to obfuscated
130    logging.debug("Obfuscated (default)")
131    return True  # default not obfuscated
132
133
134def deobfuscate_list(filelist, usefulname):
135    """Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname"""
136
137    # to be sure, only keep really exsiting files:
138    filelist = [f for f in filelist if os.path.exists(f)]
139
140    # Search for par2 files in the filelist
141    par2_files = [f for f in filelist if f.endswith(".par2")]
142    # Found any par2 files we can use?
143    run_renamer = True
144    if not par2_files:
145        logging.debug("No par2 files found to process, running renamer")
146    else:
147        # Run par2 from SABnzbd on them
148        for par2_file in par2_files:
149            # Analyse data and analyse result
150            logging.debug("Deobfuscate par2: handling %s", par2_file)
151            if decode_par2(par2_file):
152                logging.debug("Deobfuscate par2 repair/verify finished")
153                run_renamer = False
154            else:
155                logging.debug("Deobfuscate par2 repair/verify did not find anything to rename")
156
157    # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name
158    if run_renamer:
159        excluded_file_exts = EXCLUDED_FILE_EXTS
160        # If there is a collection with bigger files with the same extension, we don't want to rename it
161        extcounter = {}
162        for file in filelist:
163            if os.path.getsize(file) < MIN_FILE_SIZE:
164                # too small to care
165                continue
166            ext = get_ext(file)
167            if ext in extcounter:
168                extcounter[ext] += 1
169            else:
170                extcounter[ext] = 1
171            if extcounter[ext] >= 3 and ext not in excluded_file_exts:
172                # collection, and extension not yet in excluded_file_exts, so add it
173                excluded_file_exts = (*excluded_file_exts, ext)
174                logging.debug(
175                    "Found a collection of at least %s files with extension %s, so not renaming those files",
176                    extcounter[ext],
177                    ext,
178                )
179
180        logging.debug("Trying to see if there are qualifying files to be deobfuscated")
181        # We start with he biggest file ... probably the most important file
182        filelist = sorted(filelist, key=os.path.getsize, reverse=True)
183        for filename in filelist:
184            # check that file is still there (and not renamed by the secondary renaming process below)
185            if not os.path.isfile(filename):
186                continue
187            logging.debug("Deobfuscate inspecting %s", filename)
188            # Do we need to rename this file?
189            # Criteria: big, not-excluded extension, obfuscated (in that order)
190            if (
191                os.path.getsize(filename) > MIN_FILE_SIZE
192                and get_ext(filename) not in excluded_file_exts
193                and is_probably_obfuscated(filename)  # this as last test to avoid unnecessary analysis
194            ):
195                # Rename and make sure the new filename is unique
196                path, file = os.path.split(filename)
197                # construct new_name: <path><usefulname><extension>
198                new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename)))
199                logging.info("Deobfuscate renaming %s to %s", filename, new_name)
200                renamer(filename, new_name)
201                # find other files with the same basename in filelist, and rename them in the same way:
202                basedirfile, _ = os.path.splitext(filename)  # something like "/home/this/myiso"
203                for otherfile in filelist:
204                    if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile):
205                        # yes, same basedirfile, only different extension
206                        remainingextension = otherfile.replace(basedirfile, "")  # might be long ext, like ".dut.srt"
207                        new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension))
208                        logging.info("Deobfuscate renaming %s to %s", otherfile, new_name)
209                        # Rename and make sure the new filename is unique
210                        renamer(otherfile, new_name)
211            else:
212                logging.debug("%s excluded from deobfuscation based on size, extension or non-obfuscation", filename)
213    else:
214        logging.info("No qualifying files found to deobfuscate")
215