1# patch.py -- For dealing with packed-style patches. 2# Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@jelmer.uk> 3# 4# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 5# General Public License as public by the Free Software Foundation; version 2.0 6# or (at your option) any later version. You can redistribute it and/or 7# modify it under the terms of either of these two licenses. 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15# You should have received a copy of the licenses; if not, see 16# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 17# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 18# License, Version 2.0. 19# 20 21"""Classes for dealing with git am-style patches. 22 23These patches are basically unified diffs with some extra metadata tacked 24on. 25""" 26 27from difflib import SequenceMatcher 28import email.parser 29import time 30 31from dulwich.objects import ( 32 Blob, 33 Commit, 34 S_ISGITLINK, 35 ) 36 37FIRST_FEW_BYTES = 8000 38 39 40def write_commit_patch(f, commit, contents, progress, version=None, 41 encoding=None): 42 """Write a individual file patch. 43 44 Args: 45 commit: Commit object 46 progress: Tuple with current patch number and total. 47 Returns: 48 tuple with filename and contents 49 """ 50 encoding = encoding or getattr(f, "encoding", "ascii") 51 if isinstance(contents, str): 52 contents = contents.encode(encoding) 53 (num, total) = progress 54 f.write(b"From " + commit.id + b" " + 55 time.ctime(commit.commit_time).encode(encoding) + b"\n") 56 f.write(b"From: " + commit.author + b"\n") 57 f.write(b"Date: " + 58 time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n") 59 f.write(("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) + 60 commit.message + b"\n") 61 f.write(b"\n") 62 f.write(b"---\n") 63 try: 64 import subprocess 65 p = subprocess.Popen(["diffstat"], stdout=subprocess.PIPE, 66 stdin=subprocess.PIPE) 67 except (ImportError, OSError): 68 pass # diffstat not available? 69 else: 70 (diffstat, _) = p.communicate(contents) 71 f.write(diffstat) 72 f.write(b"\n") 73 f.write(contents) 74 f.write(b"-- \n") 75 if version is None: 76 from dulwich import __version__ as dulwich_version 77 f.write(b"Dulwich %d.%d.%d\n" % dulwich_version) 78 else: 79 f.write(version.encode(encoding) + b"\n") 80 81 82def get_summary(commit): 83 """Determine the summary line for use in a filename. 84 85 Args: 86 commit: Commit 87 Returns: Summary string 88 """ 89 decoded = commit.message.decode(errors='replace') 90 return decoded.splitlines()[0].replace(" ", "-") 91 92 93# Unified Diff 94def _format_range_unified(start, stop): 95 'Convert range to the "ed" format' 96 # Per the diff spec at http://www.unix.org/single_unix_specification/ 97 beginning = start + 1 # lines start numbering with one 98 length = stop - start 99 if length == 1: 100 return '{}'.format(beginning) 101 if not length: 102 beginning -= 1 # empty ranges begin at line just before the range 103 return '{},{}'.format(beginning, length) 104 105 106def unified_diff(a, b, fromfile='', tofile='', fromfiledate='', 107 tofiledate='', n=3, lineterm='\n'): 108 """difflib.unified_diff that can detect "No newline at end of file" as 109 original "git diff" does. 110 111 Based on the same function in Python2.7 difflib.py 112 """ 113 started = False 114 for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n): 115 if not started: 116 started = True 117 fromdate = '\t{}'.format(fromfiledate) if fromfiledate else '' 118 todate = '\t{}'.format(tofiledate) if tofiledate else '' 119 yield '--- {}{}{}'.format( 120 fromfile.decode("ascii"), 121 fromdate, 122 lineterm 123 ).encode('ascii') 124 yield '+++ {}{}{}'.format( 125 tofile.decode("ascii"), 126 todate, 127 lineterm 128 ).encode('ascii') 129 130 first, last = group[0], group[-1] 131 file1_range = _format_range_unified(first[1], last[2]) 132 file2_range = _format_range_unified(first[3], last[4]) 133 yield '@@ -{} +{} @@{}'.format( 134 file1_range, 135 file2_range, 136 lineterm 137 ).encode('ascii') 138 139 for tag, i1, i2, j1, j2 in group: 140 if tag == 'equal': 141 for line in a[i1:i2]: 142 yield b' ' + line 143 continue 144 if tag in ('replace', 'delete'): 145 for line in a[i1:i2]: 146 if not line[-1:] == b'\n': 147 line += b'\n\\ No newline at end of file\n' 148 yield b'-' + line 149 if tag in ('replace', 'insert'): 150 for line in b[j1:j2]: 151 if not line[-1:] == b'\n': 152 line += b'\n\\ No newline at end of file\n' 153 yield b'+' + line 154 155 156def is_binary(content): 157 """See if the first few bytes contain any null characters. 158 159 Args: 160 content: Bytestring to check for binary content 161 """ 162 return b'\0' in content[:FIRST_FEW_BYTES] 163 164 165def shortid(hexsha): 166 if hexsha is None: 167 return b"0" * 7 168 else: 169 return hexsha[:7] 170 171 172def patch_filename(p, root): 173 if p is None: 174 return b"/dev/null" 175 else: 176 return root + b"/" + p 177 178 179def write_object_diff(f, store, old_file, new_file, diff_binary=False): 180 """Write the diff for an object. 181 182 Args: 183 f: File-like object to write to 184 store: Store to retrieve objects from, if necessary 185 old_file: (path, mode, hexsha) tuple 186 new_file: (path, mode, hexsha) tuple 187 diff_binary: Whether to diff files even if they 188 are considered binary files by is_binary(). 189 190 Note: the tuple elements should be None for nonexistant files 191 """ 192 (old_path, old_mode, old_id) = old_file 193 (new_path, new_mode, new_id) = new_file 194 patched_old_path = patch_filename(old_path, b"a") 195 patched_new_path = patch_filename(new_path, b"b") 196 197 def content(mode, hexsha): 198 if hexsha is None: 199 return Blob.from_string(b'') 200 elif S_ISGITLINK(mode): 201 return Blob.from_string(b"Subproject commit " + hexsha + b"\n") 202 else: 203 return store[hexsha] 204 205 def lines(content): 206 if not content: 207 return [] 208 else: 209 return content.splitlines() 210 f.writelines(gen_diff_header( 211 (old_path, new_path), (old_mode, new_mode), (old_id, new_id))) 212 old_content = content(old_mode, old_id) 213 new_content = content(new_mode, new_id) 214 if not diff_binary and ( 215 is_binary(old_content.data) or is_binary(new_content.data)): 216 binary_diff = ( 217 b"Binary files " 218 + patched_old_path 219 + b" and " 220 + patched_new_path 221 + b" differ\n" 222 ) 223 f.write(binary_diff) 224 else: 225 f.writelines(unified_diff(lines(old_content), lines(new_content), 226 patched_old_path, patched_new_path)) 227 228 229# TODO(jelmer): Support writing unicode, rather than bytes. 230def gen_diff_header(paths, modes, shas): 231 """Write a blob diff header. 232 233 Args: 234 paths: Tuple with old and new path 235 modes: Tuple with old and new modes 236 shas: Tuple with old and new shas 237 """ 238 (old_path, new_path) = paths 239 (old_mode, new_mode) = modes 240 (old_sha, new_sha) = shas 241 if old_path is None and new_path is not None: 242 old_path = new_path 243 if new_path is None and old_path is not None: 244 new_path = old_path 245 old_path = patch_filename(old_path, b"a") 246 new_path = patch_filename(new_path, b"b") 247 yield b"diff --git " + old_path + b" " + new_path + b"\n" 248 249 if old_mode != new_mode: 250 if new_mode is not None: 251 if old_mode is not None: 252 yield ("old file mode %o\n" % old_mode).encode('ascii') 253 yield ("new file mode %o\n" % new_mode).encode('ascii') 254 else: 255 yield ("deleted file mode %o\n" % old_mode).encode('ascii') 256 yield b"index " + shortid(old_sha) + b".." + shortid(new_sha) 257 if new_mode is not None and old_mode is not None: 258 yield (" %o" % new_mode).encode('ascii') 259 yield b"\n" 260 261 262# TODO(jelmer): Support writing unicode, rather than bytes. 263def write_blob_diff(f, old_file, new_file): 264 """Write blob diff. 265 266 Args: 267 f: File-like object to write to 268 old_file: (path, mode, hexsha) tuple (None if nonexisting) 269 new_file: (path, mode, hexsha) tuple (None if nonexisting) 270 271 Note: The use of write_object_diff is recommended over this function. 272 """ 273 (old_path, old_mode, old_blob) = old_file 274 (new_path, new_mode, new_blob) = new_file 275 patched_old_path = patch_filename(old_path, b"a") 276 patched_new_path = patch_filename(new_path, b"b") 277 278 def lines(blob): 279 if blob is not None: 280 return blob.splitlines() 281 else: 282 return [] 283 f.writelines(gen_diff_header( 284 (old_path, new_path), (old_mode, new_mode), 285 (getattr(old_blob, "id", None), getattr(new_blob, "id", None)))) 286 old_contents = lines(old_blob) 287 new_contents = lines(new_blob) 288 f.writelines(unified_diff(old_contents, new_contents, 289 patched_old_path, patched_new_path)) 290 291 292def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False): 293 """Write tree diff. 294 295 Args: 296 f: File-like object to write to. 297 old_tree: Old tree id 298 new_tree: New tree id 299 diff_binary: Whether to diff files even if they 300 are considered binary files by is_binary(). 301 """ 302 changes = store.tree_changes(old_tree, new_tree) 303 for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes: 304 write_object_diff(f, store, (oldpath, oldmode, oldsha), 305 (newpath, newmode, newsha), diff_binary=diff_binary) 306 307 308def git_am_patch_split(f, encoding=None): 309 """Parse a git-am-style patch and split it up into bits. 310 311 Args: 312 f: File-like object to parse 313 encoding: Encoding to use when creating Git objects 314 Returns: Tuple with commit object, diff contents and git version 315 """ 316 encoding = encoding or getattr(f, "encoding", "ascii") 317 encoding = encoding or "ascii" 318 contents = f.read() 319 if (isinstance(contents, bytes) and 320 getattr(email.parser, "BytesParser", None)): 321 parser = email.parser.BytesParser() 322 msg = parser.parsebytes(contents) 323 else: 324 parser = email.parser.Parser() 325 msg = parser.parsestr(contents) 326 return parse_patch_message(msg, encoding) 327 328 329def parse_patch_message(msg, encoding=None): 330 """Extract a Commit object and patch from an e-mail message. 331 332 Args: 333 msg: An email message (email.message.Message) 334 encoding: Encoding to use to encode Git commits 335 Returns: Tuple with commit object, diff contents and git version 336 """ 337 c = Commit() 338 c.author = msg["from"].encode(encoding) 339 c.committer = msg["from"].encode(encoding) 340 try: 341 patch_tag_start = msg["subject"].index("[PATCH") 342 except ValueError: 343 subject = msg["subject"] 344 else: 345 close = msg["subject"].index("] ", patch_tag_start) 346 subject = msg["subject"][close+2:] 347 c.message = (subject.replace("\n", "") + "\n").encode(encoding) 348 first = True 349 350 body = msg.get_payload(decode=True) 351 lines = body.splitlines(True) 352 line_iter = iter(lines) 353 354 for line in line_iter: 355 if line == b"---\n": 356 break 357 if first: 358 if line.startswith(b"From: "): 359 c.author = line[len(b"From: "):].rstrip() 360 else: 361 c.message += b"\n" + line 362 first = False 363 else: 364 c.message += line 365 diff = b"" 366 for line in line_iter: 367 if line == b"-- \n": 368 break 369 diff += line 370 try: 371 version = next(line_iter).rstrip(b"\n") 372 except StopIteration: 373 version = None 374 return c, diff, version 375