1import difflib 2import functools 3import getopt 4import io 5import locale 6import os 7import sys 8 9import util 10from util import to_string 11 12class DiffFlags(): 13 def __init__(self): 14 self.ignore_all_space = False 15 self.ignore_space_change = False 16 self.unified_diff = False 17 self.num_context_lines = 3 18 self.recursive_diff = False 19 self.strip_trailing_cr = False 20 21def getDirTree(path, basedir=""): 22 # Tree is a tuple of form (dirname, child_trees). 23 # An empty dir has child_trees = [], a file has child_trees = None. 24 child_trees = [] 25 for dirname, child_dirs, files in os.walk(os.path.join(basedir, path)): 26 for child_dir in child_dirs: 27 child_trees.append(getDirTree(child_dir, dirname)) 28 for filename in files: 29 child_trees.append((filename, None)) 30 return path, sorted(child_trees) 31 32def compareTwoFiles(flags, filepaths): 33 filelines = [] 34 for file in filepaths: 35 if file == "-": 36 stdin_fileno = sys.stdin.fileno() 37 with os.fdopen(os.dup(stdin_fileno), 'rb') as stdin_bin: 38 filelines.append(stdin_bin.readlines()) 39 else: 40 with open(file, 'rb') as file_bin: 41 filelines.append(file_bin.readlines()) 42 43 try: 44 return compareTwoTextFiles(flags, filepaths, filelines, 45 locale.getpreferredencoding(False)) 46 except UnicodeDecodeError: 47 try: 48 return compareTwoTextFiles(flags, filepaths, filelines, "utf-8") 49 except: 50 return compareTwoBinaryFiles(flags, filepaths, filelines) 51 52def compareTwoBinaryFiles(flags, filepaths, filelines): 53 exitCode = 0 54 if hasattr(difflib, 'diff_bytes'): 55 # python 3.5 or newer 56 diffs = difflib.diff_bytes(difflib.unified_diff, filelines[0], 57 filelines[1], filepaths[0].encode(), 58 filepaths[1].encode(), 59 n = flags.num_context_lines) 60 diffs = [diff.decode(errors="backslashreplace") for diff in diffs] 61 else: 62 # python 2.7 63 if flags.unified_diff: 64 func = difflib.unified_diff 65 else: 66 func = difflib.context_diff 67 diffs = func(filelines[0], filelines[1], filepaths[0], filepaths[1], 68 n = flags.num_context_lines) 69 70 for diff in diffs: 71 sys.stdout.write(to_string(diff)) 72 exitCode = 1 73 return exitCode 74 75def compareTwoTextFiles(flags, filepaths, filelines_bin, encoding): 76 filelines = [] 77 for lines_bin in filelines_bin: 78 lines = [] 79 for line_bin in lines_bin: 80 line = line_bin.decode(encoding=encoding) 81 lines.append(line) 82 filelines.append(lines) 83 84 exitCode = 0 85 def compose2(f, g): 86 return lambda x: f(g(x)) 87 88 f = lambda x: x 89 if flags.strip_trailing_cr: 90 f = compose2(lambda line: line.replace('\r\n', '\n'), f) 91 if flags.ignore_all_space or flags.ignore_space_change: 92 ignoreSpace = lambda line, separator: \ 93 separator.join(line.split()) + "\n" 94 ignoreAllSpaceOrSpaceChange = functools.partial(ignoreSpace, separator='' if flags.ignore_all_space else ' ') 95 f = compose2(ignoreAllSpaceOrSpaceChange, f) 96 97 for idx, lines in enumerate(filelines): 98 filelines[idx]= [f(line) for line in lines] 99 100 func = difflib.unified_diff if flags.unified_diff else difflib.context_diff 101 for diff in func(filelines[0], filelines[1], filepaths[0], filepaths[1], 102 n = flags.num_context_lines): 103 sys.stdout.write(to_string(diff)) 104 exitCode = 1 105 return exitCode 106 107def printDirVsFile(dir_path, file_path): 108 if os.path.getsize(file_path): 109 msg = "File %s is a directory while file %s is a regular file" 110 else: 111 msg = "File %s is a directory while file %s is a regular empty file" 112 sys.stdout.write(msg % (dir_path, file_path) + "\n") 113 114def printFileVsDir(file_path, dir_path): 115 if os.path.getsize(file_path): 116 msg = "File %s is a regular file while file %s is a directory" 117 else: 118 msg = "File %s is a regular empty file while file %s is a directory" 119 sys.stdout.write(msg % (file_path, dir_path) + "\n") 120 121def printOnlyIn(basedir, path, name): 122 sys.stdout.write("Only in %s: %s\n" % (os.path.join(basedir, path), name)) 123 124def compareDirTrees(flags, dir_trees, base_paths=["", ""]): 125 # Dirnames of the trees are not checked, it's caller's responsibility, 126 # as top-level dirnames are always different. Base paths are important 127 # for doing os.walk, but we don't put it into tree's dirname in order 128 # to speed up string comparison below and while sorting in getDirTree. 129 left_tree, right_tree = dir_trees[0], dir_trees[1] 130 left_base, right_base = base_paths[0], base_paths[1] 131 132 # Compare two files or report file vs. directory mismatch. 133 if left_tree[1] is None and right_tree[1] is None: 134 return compareTwoFiles(flags, 135 [os.path.join(left_base, left_tree[0]), 136 os.path.join(right_base, right_tree[0])]) 137 138 if left_tree[1] is None and right_tree[1] is not None: 139 printFileVsDir(os.path.join(left_base, left_tree[0]), 140 os.path.join(right_base, right_tree[0])) 141 return 1 142 143 if left_tree[1] is not None and right_tree[1] is None: 144 printDirVsFile(os.path.join(left_base, left_tree[0]), 145 os.path.join(right_base, right_tree[0])) 146 return 1 147 148 # Compare two directories via recursive use of compareDirTrees. 149 exitCode = 0 150 left_names = [node[0] for node in left_tree[1]] 151 right_names = [node[0] for node in right_tree[1]] 152 l, r = 0, 0 153 while l < len(left_names) and r < len(right_names): 154 # Names are sorted in getDirTree, rely on that order. 155 if left_names[l] < right_names[r]: 156 exitCode = 1 157 printOnlyIn(left_base, left_tree[0], left_names[l]) 158 l += 1 159 elif left_names[l] > right_names[r]: 160 exitCode = 1 161 printOnlyIn(right_base, right_tree[0], right_names[r]) 162 r += 1 163 else: 164 exitCode |= compareDirTrees(flags, 165 [left_tree[1][l], right_tree[1][r]], 166 [os.path.join(left_base, left_tree[0]), 167 os.path.join(right_base, right_tree[0])]) 168 l += 1 169 r += 1 170 171 # At least one of the trees has ended. Report names from the other tree. 172 while l < len(left_names): 173 exitCode = 1 174 printOnlyIn(left_base, left_tree[0], left_names[l]) 175 l += 1 176 while r < len(right_names): 177 exitCode = 1 178 printOnlyIn(right_base, right_tree[0], right_names[r]) 179 r += 1 180 return exitCode 181 182def main(argv): 183 if sys.platform == "win32": 184 if hasattr(sys.stdout, 'buffer'): 185 # python 3 186 sys.stdout = io.TextIOWrapper(sys.stdout.buffer, newline='\n') 187 else: 188 # python 2.7 189 import msvcrt 190 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY) 191 args = argv[1:] 192 try: 193 opts, args = getopt.gnu_getopt(args, "wbuU:r", ["strip-trailing-cr"]) 194 except getopt.GetoptError as err: 195 sys.stderr.write("Unsupported: 'diff': %s\n" % str(err)) 196 sys.exit(1) 197 198 flags = DiffFlags() 199 filelines, filepaths, dir_trees = ([] for i in range(3)) 200 for o, a in opts: 201 if o == "-w": 202 flags.ignore_all_space = True 203 elif o == "-b": 204 flags.ignore_space_change = True 205 elif o == "-u": 206 flags.unified_diff = True 207 elif o.startswith("-U"): 208 flags.unified_diff = True 209 try: 210 flags.num_context_lines = int(a) 211 if flags.num_context_lines < 0: 212 raise ValueException 213 except: 214 sys.stderr.write("Error: invalid '-U' argument: {}\n" 215 .format(a)) 216 sys.exit(1) 217 elif o == "-r": 218 flags.recursive_diff = True 219 elif o == "--strip-trailing-cr": 220 flags.strip_trailing_cr = True 221 else: 222 assert False, "unhandled option" 223 224 if len(args) != 2: 225 sys.stderr.write("Error: missing or extra operand\n") 226 sys.exit(1) 227 228 exitCode = 0 229 try: 230 for file in args: 231 if file != "-" and not os.path.isabs(file): 232 file = os.path.realpath(os.path.join(os.getcwd(), file)) 233 234 if flags.recursive_diff: 235 if file == "-": 236 sys.stderr.write("Error: cannot recursively compare '-'\n") 237 sys.exit(1) 238 dir_trees.append(getDirTree(file)) 239 else: 240 filepaths.append(file) 241 242 if not flags.recursive_diff: 243 exitCode = compareTwoFiles(flags, filepaths) 244 else: 245 exitCode = compareDirTrees(flags, dir_trees) 246 247 except IOError as err: 248 sys.stderr.write("Error: 'diff' command failed, %s\n" % str(err)) 249 exitCode = 1 250 251 sys.exit(exitCode) 252 253if __name__ == "__main__": 254 main(sys.argv) 255