1#!/usr/bin/python3 -OO 2# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org> 3# 4# This program is free software; you can redistribute it and/or 5# modify it under the terms of the GNU General Public License 6# as published by the Free Software Foundation; either version 2 7# of the License, or (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program; if not, write to the Free Software 16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 18""" 19 20Deobfuscation post-processing script: 21 22Will check in the completed job folder if maybe there are par2 files, 23for example "rename.par2", and use those to rename the files. 24If there is no "rename.par2" available, it will rename large, not-excluded 25files to the job-name in the queue if the filename looks obfuscated 26 27Based on work by P1nGu1n 28 29""" 30 31import hashlib 32import logging 33import os 34import re 35 36from sabnzbd.filesystem import get_unique_filename, renamer, get_ext 37from sabnzbd.par2file import is_parfile, parse_par2_file 38 39# Files to exclude and minimal file size for renaming 40EXCLUDED_FILE_EXTS = (".vob", ".rar", ".par2", ".mts", ".m2ts", ".cpi", ".clpi", ".mpl", ".mpls", ".bdm", ".bdmv") 41MIN_FILE_SIZE = 10 * 1024 * 1024 42 43 44def decode_par2(parfile): 45 """Parse a par2 file and rename files listed in the par2 to their real name""" 46 # Check if really a par2 file 47 if not is_parfile(parfile): 48 logging.info("Par2 file %s was not really a par2 file") 49 return False 50 51 # Parse the par2 file 52 md5of16k = {} 53 parse_par2_file(parfile, md5of16k) 54 55 # Parse all files in the folder 56 dirname = os.path.dirname(parfile) 57 result = False 58 for fn in os.listdir(dirname): 59 filepath = os.path.join(dirname, fn) 60 # Only check files 61 if os.path.isfile(filepath): 62 with open(filepath, "rb") as fileToMatch: 63 first16k_data = fileToMatch.read(16384) 64 65 # Check if we have this hash 66 file_md5of16k = hashlib.md5(first16k_data).digest() 67 if file_md5of16k in md5of16k: 68 new_path = os.path.join(dirname, md5of16k[file_md5of16k]) 69 # Make sure it's a unique name 70 renamer(filepath, get_unique_filename(new_path)) 71 result = True 72 return result 73 74 75def is_probably_obfuscated(myinputfilename): 76 """Returns boolean if filename is likely obfuscated. Default: True 77 myinputfilename (string) can be a plain file name, or a full path""" 78 79 # Find filebasename 80 path, filename = os.path.split(myinputfilename) 81 filebasename, fileextension = os.path.splitext(filename) 82 83 # First fixed patterns that we know of: 84 logging.debug("Checking: %s", filebasename) 85 86 # ...blabla.H.264/b082fa0beaa644d3aa01045d5b8d0b36.mkv is certainly obfuscated 87 if re.findall(r"^[a-f0-9]{32}$", filebasename): 88 logging.debug("Obfuscated: 32 hex digit") 89 # exactly 32 hex digits, so: 90 return True 91 92 # 0675e29e9abfd2.f7d069dab0b853283cc1b069a25f82.6547 93 if re.findall(r"^[a-f0-9\.]{40,}$", filebasename): 94 logging.debug("Obfuscated: starting with 40+ lower case hex digits and/or dots") 95 return True 96 97 # /some/thing/abc.xyz.a4c567edbcbf27.BLA is certainly obfuscated 98 if re.findall(r"^abc\.xyz", filebasename): 99 logging.debug("Obfuscated: starts with 'abc.xyz'") 100 # ... which we consider as obfuscated: 101 return True 102 103 # these are signals for the obfuscation versus non-obfuscation 104 decimals = sum(1 for c in filebasename if c.isnumeric()) 105 upperchars = sum(1 for c in filebasename if c.isupper()) 106 lowerchars = sum(1 for c in filebasename if c.islower()) 107 spacesdots = sum(1 for c in filebasename if c == " " or c == "." or c == "_") # space-like symbols 108 109 # Example: "Great Distro" 110 if upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1: 111 logging.debug("Not obfuscated: upperchars >= 2 and lowerchars >= 2 and spacesdots >= 1") 112 return False 113 114 # Example: "this is a download" 115 if spacesdots >= 3: 116 logging.debug("Not obfuscated: spacesdots >= 3") 117 return False 118 119 # Example: "Beast 2020" 120 if (upperchars + lowerchars >= 4) and decimals >= 4 and spacesdots >= 1: 121 logging.debug("Not obfuscated: (upperchars + lowerchars >= 4) and decimals > 3 and spacesdots > 1") 122 return False 123 124 # Example: "Catullus", starts with a capital, and most letters are lower case 125 if filebasename[0].isupper() and lowerchars > 2 and upperchars / lowerchars <= 0.25: 126 logging.debug("Not obfuscated: starts with a capital, and most letters are lower case") 127 return False 128 129 # If we get here, no trigger for a clear name was found, so let's default to obfuscated 130 logging.debug("Obfuscated (default)") 131 return True # default not obfuscated 132 133 134def deobfuscate_list(filelist, usefulname): 135 """Check all files in filelist, and if wanted, deobfuscate: rename to filename based on usefulname""" 136 137 # to be sure, only keep really exsiting files: 138 filelist = [f for f in filelist if os.path.exists(f)] 139 140 # Search for par2 files in the filelist 141 par2_files = [f for f in filelist if f.endswith(".par2")] 142 # Found any par2 files we can use? 143 run_renamer = True 144 if not par2_files: 145 logging.debug("No par2 files found to process, running renamer") 146 else: 147 # Run par2 from SABnzbd on them 148 for par2_file in par2_files: 149 # Analyse data and analyse result 150 logging.debug("Deobfuscate par2: handling %s", par2_file) 151 if decode_par2(par2_file): 152 logging.debug("Deobfuscate par2 repair/verify finished") 153 run_renamer = False 154 else: 155 logging.debug("Deobfuscate par2 repair/verify did not find anything to rename") 156 157 # No par2 files? Then we try to rename qualifying (big, not-excluded, obfuscated) files to the job-name 158 if run_renamer: 159 excluded_file_exts = EXCLUDED_FILE_EXTS 160 # If there is a collection with bigger files with the same extension, we don't want to rename it 161 extcounter = {} 162 for file in filelist: 163 if os.path.getsize(file) < MIN_FILE_SIZE: 164 # too small to care 165 continue 166 ext = get_ext(file) 167 if ext in extcounter: 168 extcounter[ext] += 1 169 else: 170 extcounter[ext] = 1 171 if extcounter[ext] >= 3 and ext not in excluded_file_exts: 172 # collection, and extension not yet in excluded_file_exts, so add it 173 excluded_file_exts = (*excluded_file_exts, ext) 174 logging.debug( 175 "Found a collection of at least %s files with extension %s, so not renaming those files", 176 extcounter[ext], 177 ext, 178 ) 179 180 logging.debug("Trying to see if there are qualifying files to be deobfuscated") 181 # We start with he biggest file ... probably the most important file 182 filelist = sorted(filelist, key=os.path.getsize, reverse=True) 183 for filename in filelist: 184 # check that file is still there (and not renamed by the secondary renaming process below) 185 if not os.path.isfile(filename): 186 continue 187 logging.debug("Deobfuscate inspecting %s", filename) 188 # Do we need to rename this file? 189 # Criteria: big, not-excluded extension, obfuscated (in that order) 190 if ( 191 os.path.getsize(filename) > MIN_FILE_SIZE 192 and get_ext(filename) not in excluded_file_exts 193 and is_probably_obfuscated(filename) # this as last test to avoid unnecessary analysis 194 ): 195 # Rename and make sure the new filename is unique 196 path, file = os.path.split(filename) 197 # construct new_name: <path><usefulname><extension> 198 new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), get_ext(filename))) 199 logging.info("Deobfuscate renaming %s to %s", filename, new_name) 200 renamer(filename, new_name) 201 # find other files with the same basename in filelist, and rename them in the same way: 202 basedirfile, _ = os.path.splitext(filename) # something like "/home/this/myiso" 203 for otherfile in filelist: 204 if otherfile.startswith(basedirfile + ".") and os.path.isfile(otherfile): 205 # yes, same basedirfile, only different extension 206 remainingextension = otherfile.replace(basedirfile, "") # might be long ext, like ".dut.srt" 207 new_name = get_unique_filename("%s%s" % (os.path.join(path, usefulname), remainingextension)) 208 logging.info("Deobfuscate renaming %s to %s", otherfile, new_name) 209 # Rename and make sure the new filename is unique 210 renamer(otherfile, new_name) 211 else: 212 logging.debug("%s excluded from deobfuscation based on size, extension or non-obfuscation", filename) 213 else: 214 logging.info("No qualifying files found to deobfuscate") 215