1# -*- coding: utf-8 -*- 2# 3# Picard, the next-generation MusicBrainz tagger 4# 5# Copyright (C) 2013-2014 Ionuț Ciocîrlan 6# Copyright (C) 2013-2014, 2018-2019 Laurent Monin 7# Copyright (C) 2014 Michael Wiencek 8# Copyright (C) 2017 Sambhav Kothari 9# Copyright (C) 2017 Ville Skyttä 10# Copyright (C) 2018 Antonio Larrosa 11# Copyright (C) 2019-2020 Philipp Wolfer 12# 13# This program is free software; you can redistribute it and/or 14# modify it under the terms of the GNU General Public License 15# as published by the Free Software Foundation; either version 2 16# of the License, or (at your option) any later version. 17# 18# This program is distributed in the hope that it will be useful, 19# but WITHOUT ANY WARRANTY; without even the implied warranty of 20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 21# GNU General Public License for more details. 22# 23# You should have received a copy of the GNU General Public License 24# along with this program; if not, write to the Free Software 25# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 26 27 28import math 29import os 30import re 31import shutil 32import struct 33import sys 34import unicodedata 35 36from PyQt5.QtCore import QStandardPaths 37 38from picard import log 39from picard.const.sys import ( 40 IS_LINUX, 41 IS_MACOS, 42 IS_WIN, 43) 44from picard.util import ( 45 _io_encoding, 46 decode_filename, 47 encode_filename, 48 samefile, 49) 50 51 52win32api = None 53if IS_WIN: 54 try: 55 import win32api # isort:skip 56 import pywintypes 57 except ImportError as e: 58 log.warning('pywin32 not available: %s', e) 59 60 61def _get_utf16_length(text): 62 """Returns the number of code points used by a unicode object in its 63 UTF-16 representation. 64 """ 65 if isinstance(text, bytes): 66 return len(text) 67 # if this is a narrow Python build, len will in fact return exactly 68 # what we're looking for 69 if sys.maxunicode == 0xFFFF: 70 return len(text) 71 # otherwise, encode the string in UTF-16 using the system's endianness, 72 # and divide the resulting length by 2 73 return len(text.encode("utf-16%ce" % sys.byteorder[0])) // 2 74 75 76def _shorten_to_utf16_length(text, length): 77 """Truncates a str object to the given number of UTF-16 code points. 78 """ 79 assert isinstance(text, str), "This function only works on unicode" 80 # if this is a narrow Python build, regular slicing will do exactly 81 # what we're looking for 82 if sys.maxunicode == 0xFFFF: 83 shortened = text[:length] 84 # before returning, we need to check if we didn't cut in the middle 85 # of a surrogate pair 86 last = shortened[-1:] 87 if last and 0xD800 <= ord(last) <= 0xDBFF: 88 # it's a leading surrogate alright 89 return shortened[:-1] 90 # else... 91 return shortened 92 # otherwise, encode the string in UTF-16 using the system's endianness, 93 # and shorten by twice the length 94 enc = "utf-16%ce" % sys.byteorder[0] 95 shortened = text.encode(enc)[:length * 2] 96 # if we hit a surrogate pair, get rid of the last codepoint 97 last = shortened[-2:] 98 if last and 0xD800 <= struct.unpack("=H", last)[0] <= 0xDBFF: 99 shortened = shortened[:-2] 100 return shortened.decode(enc) 101 102 103def _shorten_to_utf16_nfd_length(text, length): 104 text = unicodedata.normalize('NFD', text) 105 newtext = _shorten_to_utf16_length(text, length) 106 # if the first cut-off character was a combining one, remove our last 107 try: 108 if unicodedata.combining(text[len(newtext)]): 109 newtext = newtext[:-1] 110 except IndexError: 111 pass 112 return unicodedata.normalize('NFC', newtext) 113 114 115_re_utf8 = re.compile(r'^utf([-_]?8)$', re.IGNORECASE) 116def _shorten_to_bytes_length(text, length): # noqa: E302 117 """Truncates a unicode object to the given number of bytes it would take 118 when encoded in the "filesystem encoding". 119 """ 120 assert isinstance(text, str), "This function only works on unicode" 121 raw = encode_filename(text) 122 # maybe there's no need to truncate anything 123 if len(raw) <= length: 124 return text 125 # or maybe there's nothing multi-byte here 126 if len(raw) == len(text): 127 return text[:length] 128 # if we're dealing with utf-8, we can use an efficient algorithm 129 # to deal with character boundaries 130 if _re_utf8.match(_io_encoding): 131 i = length 132 # a UTF-8 intermediate byte starts with the bits 10xxxxxx, 133 # so ord(char) & 0b11000000 = 0b10000000 134 while i > 0 and (raw[i] & 0xC0) == 0x80: 135 i -= 1 136 return decode_filename(raw[:i]) 137 # finally, a brute force approach 138 i = length 139 while i > 0: 140 try: 141 return decode_filename(raw[:i]) 142 except UnicodeDecodeError: 143 pass 144 i -= 1 145 # hmm. we got here? 146 return "" 147 148 149SHORTEN_BYTES, SHORTEN_UTF16, SHORTEN_UTF16_NFD = 0, 1, 2 150def shorten_filename(filename, length, mode): # noqa: E302 151 """Truncates a filename to the given number of thingies, 152 as implied by `mode`. 153 """ 154 if isinstance(filename, bytes): 155 return filename[:length] 156 if mode == SHORTEN_BYTES: 157 return _shorten_to_bytes_length(filename, length) 158 if mode == SHORTEN_UTF16: 159 return _shorten_to_utf16_length(filename, length) 160 if mode == SHORTEN_UTF16_NFD: 161 return _shorten_to_utf16_nfd_length(filename, length) 162 163 164def shorten_path(path, length, mode): 165 """Reduce path nodes' length to given limit(s). 166 167 path: Absolute or relative path to shorten. 168 length: Maximum number of code points / bytes allowed in a node. 169 mode: One of SHORTEN_BYTES, SHORTEN_UTF16, SHORTEN_UTF16_NFD. 170 """ 171 def shorten(name, length): 172 return name and shorten_filename(name, length, mode).strip() or "" 173 dirpath, filename = os.path.split(path) 174 fileroot, ext = os.path.splitext(filename) 175 return os.path.join( 176 os.path.join(*[shorten(node, length) 177 for node in dirpath.split(os.path.sep)]), 178 shorten(fileroot, length - len(ext)) + ext 179 ) 180 181 182def _shorten_to_utf16_ratio(text, ratio): 183 """Shortens the string to the given ratio (and strips it).""" 184 length = _get_utf16_length(text) 185 limit = max(1, int(math.floor(length / ratio))) 186 if isinstance(text, bytes): 187 return text[:limit].strip() 188 else: 189 return _shorten_to_utf16_length(text, limit).strip() 190 191 192class WinPathTooLong(OSError): 193 pass 194 195 196def _make_win_short_filename(relpath, reserved=0): 197 r"""Shorten a relative file path according to WinAPI quirks. 198 199 relpath: The file's path. 200 reserved: Number of characters reserved for the parent path to be joined with, 201 e.g. 3 if it will be joined with "X:\", respectively 5 for "X:\y\". 202 (note the inclusion of the final backslash) 203 """ 204 # See: 205 # http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx 206 # 207 # The MAX_PATH is 260 characters, with this possible format for a file: 208 # "X:\<244-char dir path>\<11-char filename><NUL>". 209 210 # Our constraints: 211 # the entire path's length 212 MAX_FILEPATH_LEN = 259 213 # the entire parent directory path's length, *excluding* the final separator 214 MAX_DIRPATH_LEN = 247 215 # a single node's length (this seems to be the case for older NTFS) 216 MAX_NODE_LEN = 226 217 218 # to make predictable directory paths we need to fit the directories in 219 # MAX_DIRPATH_LEN, and truncate the filename to whatever's left 220 remaining = MAX_DIRPATH_LEN - reserved 221 222 # to make things more readable... 223 def shorten(path, length): 224 return shorten_path(path, length, mode=SHORTEN_UTF16) 225 xlength = _get_utf16_length 226 227 # shorten to MAX_NODE_LEN from the beginning 228 relpath = shorten(relpath, MAX_NODE_LEN) 229 dirpath, filename = os.path.split(relpath) 230 # what if dirpath is already the right size? 231 dplen = xlength(dirpath) 232 if dplen <= remaining: 233 filename_max = MAX_FILEPATH_LEN - (reserved + dplen + 1) # the final separator 234 filename = shorten(filename, filename_max) 235 return os.path.join(dirpath, filename) 236 237 # compute the directory path and the maximum number of characters 238 # in a filename, and cache them 239 try: 240 computed = _make_win_short_filename._computed 241 except AttributeError: 242 computed = _make_win_short_filename._computed = {} 243 try: 244 finaldirpath, filename_max = computed[(dirpath, reserved)] 245 except KeyError: 246 dirnames = dirpath.split(os.path.sep) 247 # allocate space for the separators, 248 # but don't include the final one 249 remaining -= len(dirnames) - 1 250 # make sure we can have at least single-character dirnames 251 average = float(remaining) / len(dirnames) 252 if average < 1: 253 raise WinPathTooLong( 254 "Path too long. " 255 "You need to move renamed files to a different directory." 256 ) 257 258 # try to reduce directories exceeding average with a ratio proportional 259 # to how much they exceed with; if not possible, reduce all dirs 260 # proportionally to their initial length 261 shortdirnames = [dn for dn in dirnames if len(dn) <= average] 262 totalchars = sum(map(xlength, dirnames)) 263 shortdirchars = sum(map(xlength, shortdirnames)) 264 265 # do we have at least 1 char for longdirs? 266 if remaining > shortdirchars + len(dirnames) - len(shortdirnames): 267 ratio = float(totalchars - shortdirchars) / (remaining - shortdirchars) 268 for i, dn in enumerate(dirnames): 269 if len(dn) > average: 270 dirnames[i] = _shorten_to_utf16_ratio(dn, ratio) 271 else: 272 ratio = float(totalchars) / remaining 273 dirnames = [_shorten_to_utf16_ratio(dn, ratio) for dn in dirnames] 274 275 # here it is: 276 finaldirpath = os.path.join(*dirnames) 277 278 # did we win back some chars from .floor()s and .strip()s? 279 recovered = remaining - sum(map(xlength, dirnames)) 280 # so how much do we have left for the filename? 281 filename_max = MAX_FILEPATH_LEN - MAX_DIRPATH_LEN - 1 + recovered 282 # ^ the final separator 283 284 # and don't forget to cache 285 computed[(dirpath, reserved)] = (finaldirpath, filename_max) 286 287 # finally... 288 filename = shorten(filename, filename_max) 289 return os.path.join(finaldirpath, filename) 290 291 292def _get_mount_point(target): 293 """Finds the target's mountpoint.""" 294 # and caches it for future lookups 295 try: 296 mounts = _get_mount_point._mounts 297 except AttributeError: 298 mounts = _get_mount_point._mounts = {} 299 try: 300 mount = mounts[target] 301 except KeyError: 302 mount = target 303 while mount and not os.path.ismount(mount): 304 mount = os.path.dirname(mount) 305 mounts[target] = mount 306 return mount 307 308 309# NOTE: this could be merged with the function above, and get all needed info 310# in a single call, returning the filesystem type as well. (but python's 311# posix.statvfs_result doesn't implement f_fsid) 312def _get_filename_limit(target): 313 """Finds the maximum filename length under the given directory.""" 314 # and caches it 315 try: 316 limits = _get_filename_limit._limits 317 except AttributeError: 318 limits = _get_filename_limit._limits = {} 319 try: 320 limit = limits[target] 321 except KeyError: 322 # we need to call statvfs on an existing target 323 d = target 324 while not os.path.exists(d): 325 d = os.path.dirname(d) 326 # XXX http://bugs.python.org/issue18695 327 try: 328 limit = os.statvfs(d).f_namemax 329 except UnicodeEncodeError: 330 limit = os.statvfs(d.encode(_io_encoding)).f_namemax 331 limits[target] = limit 332 return limit 333 334 335def make_short_filename(basedir, relpath, win_compat=False, relative_to=""): 336 """Shorten a filename's path to proper limits. 337 338 basedir: Absolute path of the base directory where files will be moved. 339 relpath: File path, relative from the base directory. 340 win_compat: Windows is quirky. 341 relative_to: An ancestor directory of basedir, against which win_compat 342 will be applied. 343 """ 344 # only deal with absolute paths. it saves a lot of grief, 345 # and is the right thing to do, even for renames. 346 try: 347 basedir = os.path.abspath(basedir) 348 except FileNotFoundError: 349 # os.path.abspath raises an exception if basedir is a relative path and 350 # cwd doesn't exist anymore 351 basedir = QStandardPaths.writableLocation(QStandardPaths.MusicLocation) 352 # also, make sure the relative path is clean 353 relpath = os.path.normpath(relpath) 354 if win_compat and relative_to: 355 relative_to = os.path.abspath(relative_to) 356 assert basedir.startswith(relative_to) and \ 357 basedir.split(relative_to)[1][:1] in (os.path.sep, ''), \ 358 "`relative_to` must be an ancestor of `basedir`" 359 # always strip the relpath parts 360 relpath = os.path.join(*[part.strip() for part in relpath.split(os.path.sep)]) 361 # if we're on windows, delegate the work to a windows-specific function 362 if IS_WIN: 363 reserved = len(basedir) 364 if not basedir.endswith(os.path.sep): 365 reserved += 1 366 return _make_win_short_filename(relpath, reserved) 367 # if we're being windows compatible, figure out how much 368 # needs to be reserved for the basedir part 369 if win_compat: 370 # if a relative ancestor wasn't provided, 371 # use the basedir's mount point 372 if not relative_to: 373 relative_to = _get_mount_point(basedir) 374 # if it's root, presume the parent will be copied over 375 # to windows, and hope for the best 376 if relative_to == os.path.sep: 377 relative_to = os.path.dirname(basedir) 378 reserved = len(basedir) - len(relative_to) + 3 + 1 379 # the drive name ^ + ^ the final separator 380 relpath = _make_win_short_filename(relpath, reserved) 381 # on *nix we can consider there is no path limit, but there is 382 # a filename length limit. 383 if IS_MACOS: 384 # on OS X (i.e. HFS+), this is expressed in UTF-16 code points, 385 # in NFD normalization form 386 relpath = shorten_path(relpath, 255, mode=SHORTEN_UTF16_NFD) 387 else: 388 # on everything else the limit is expressed in bytes, 389 # and filesystem-dependent 390 limit = _get_filename_limit(basedir) 391 relpath = shorten_path(relpath, limit, mode=SHORTEN_BYTES) 392 return relpath 393 394 395def samefile_different_casing(path1, path2): 396 """Returns True if path1 and path2 refer to the same file, but differ in casing of the filename. 397 Returns False if path1 and path2 refer to different files or there case is identical. 398 """ 399 path1 = os.path.normpath(path1) 400 path2 = os.path.normpath(path2) 401 if path1 == path2 or not os.path.exists(path1) or not os.path.exists(path2): 402 return False 403 dir1 = os.path.realpath(os.path.normcase(os.path.dirname(path1))) 404 dir2 = os.path.realpath(os.path.normcase(os.path.dirname(path2))) 405 if dir1 != dir2 or not samefile(path1, path2): 406 return False 407 file1 = os.path.basename(path1) 408 file2 = os.path.basename(path2) 409 return file1 != file2 and file1.lower() == file2.lower() 410 411 412def _make_unique_temp_name(target_path): 413 i = 0 414 target_dir = os.path.dirname(target_path) 415 target_filename = os.path.basename(target_path) 416 while True: 417 # Attempt to get a non-existant temporary name for the file 418 # without changing path length. 419 temp_filename = '.%s%02d' % (target_filename[:-3], i) 420 temp_path = os.path.join(target_dir, temp_filename) 421 if not os.path.exists(temp_path): 422 return temp_path 423 i += 1 424 425 426def _move_force_rename(source_path, target_path): 427 """Moves a file by renaming it first to a temporary name. 428 Ensure file casing changes on system's not natively supporting this. 429 """ 430 temp_path = _make_unique_temp_name(target_path) 431 shutil.move(source_path, temp_path) 432 os.rename(temp_path, target_path) 433 434 435def move_ensure_casing(source_path, target_path): 436 """Moves a file from source_path to target_path. 437 If the move would result just in the name changing the case apply workarounds 438 for Linux and Windows to ensure the case change is applied on case-insensitive 439 file systems. Otherwise use shutil.move to move the file. 440 """ 441 source_path = os.path.normpath(source_path) 442 target_path = os.path.normpath(target_path) 443 if source_path == target_path: 444 return 445 # Special handling is only required if both paths refer to the same file 446 # but the file name differs in casing. 447 # Also macOS does allow renaming only the casing and does not need special 448 # handling. 449 if not IS_MACOS and samefile_different_casing(source_path, target_path): 450 if IS_LINUX: 451 # On Linux always force a double move 452 _move_force_rename(source_path, target_path) 453 return 454 elif IS_WIN and win32api: 455 # Windows supports case renaming for NTFS and SMB shares, but not 456 # on FAT32 or exFAT file systems. Perform a normal move first, 457 # then check the result. 458 shutil.move(source_path, target_path) 459 try: 460 # Get the path in the actual casing as stored on disk 461 actual_path = win32api.GetLongPathNameW(win32api.GetShortPathName(target_path)) 462 if samefile_different_casing(target_path, actual_path): 463 _move_force_rename(source_path, target_path) 464 except pywintypes.error: 465 pass 466 return 467 # Just perform a normal move 468 try: 469 shutil.move(source_path, target_path) 470 except shutil.SameFileError: 471 # Sometimes different paths refer to the same file (e.g. network path / local path on Windows) 472 pass 473