1#!/usr/bin/python3 -OO 2# Copyright 2007-2021 The SABnzbd-Team <team@sabnzbd.org> 3# 4# This program is free software; you can redistribute it and/or 5# modify it under the terms of the GNU General Public License 6# as published by the Free Software Foundation; either version 2 7# of the License, or (at your option) any later version. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13# 14# You should have received a copy of the GNU General Public License 15# along with this program; if not, write to the Free Software 16# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 17 18""" 19sabnzbd.encoding - Unicode/byte translation functions 20""" 21 22import locale 23import chardet 24from xml.sax.saxutils import escape 25from typing import AnyStr 26 27CODEPAGE = locale.getpreferredencoding() 28 29 30def utob(str_in: AnyStr) -> bytes: 31 """Shorthand for converting UTF-8 string to bytes""" 32 if isinstance(str_in, bytes): 33 return str_in 34 return str_in.encode("utf-8") 35 36 37def ubtou(str_in: AnyStr) -> str: 38 """Shorthand for converting unicode bytes to UTF-8 string""" 39 if not isinstance(str_in, bytes): 40 return str_in 41 return str_in.decode("utf-8") 42 43 44def platform_btou(str_in: AnyStr) -> str: 45 """Return Unicode string, if not already Unicode, decode with locale encoding. 46 NOTE: Used for POpen because universal_newlines/text parameter doesn't 47 always work! We cannot use encoding-parameter because it's Python 3.7+ 48 """ 49 if isinstance(str_in, bytes): 50 try: 51 return ubtou(str_in) 52 except UnicodeDecodeError: 53 return str_in.decode(CODEPAGE, errors="replace").replace("?", "!") 54 else: 55 return str_in 56 57 58def correct_unknown_encoding(str_or_bytes_in: AnyStr) -> str: 59 """Files created on Windows but unpacked/repaired on 60 linux can result in invalid filenames. Try to fix this 61 encoding by going to bytes and then back to unicode again. 62 Last resort we use chardet package 63 """ 64 # If already string, back to bytes 65 if not isinstance(str_or_bytes_in, bytes): 66 str_or_bytes_in = str_or_bytes_in.encode("utf-8", "surrogateescape") 67 68 # Try simple bytes-to-string 69 try: 70 return ubtou(str_or_bytes_in) 71 except UnicodeDecodeError: 72 try: 73 # Try using 8-bit ASCII, if came from Windows 74 return str_or_bytes_in.decode("ISO-8859-1") 75 except ValueError: 76 # Last resort we use the slow chardet package 77 return str_or_bytes_in.decode(chardet.detect(str_or_bytes_in)["encoding"]) 78 79 80def xml_name(p): 81 """Prepare name for use in HTML/XML contect""" 82 return escape(str(p)) 83