1# protocol.py -- Shared parts of the git protocols 2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 4# 5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 6# General Public License as public by the Free Software Foundation; version 2.0 7# or (at your option) any later version. You can redistribute it and/or 8# modify it under the terms of either of these two licenses. 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16# You should have received a copy of the licenses; if not, see 17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 19# License, Version 2.0. 20# 21 22"""Generic functions for talking the git smart server protocol.""" 23 24from io import BytesIO 25from os import ( 26 SEEK_END, 27 ) 28import socket 29 30import dulwich 31from dulwich.errors import ( 32 HangupException, 33 GitProtocolError, 34 ) 35 36TCP_GIT_PORT = 9418 37 38ZERO_SHA = b"0" * 40 39 40SINGLE_ACK = 0 41MULTI_ACK = 1 42MULTI_ACK_DETAILED = 2 43 44# pack data 45SIDE_BAND_CHANNEL_DATA = 1 46# progress messages 47SIDE_BAND_CHANNEL_PROGRESS = 2 48# fatal error message just before stream aborts 49SIDE_BAND_CHANNEL_FATAL = 3 50 51CAPABILITY_DEEPEN_SINCE = b'deepen-since' 52CAPABILITY_DEEPEN_NOT = b'deepen-not' 53CAPABILITY_DEEPEN_RELATIVE = b'deepen-relative' 54CAPABILITY_DELETE_REFS = b'delete-refs' 55CAPABILITY_INCLUDE_TAG = b'include-tag' 56CAPABILITY_MULTI_ACK = b'multi_ack' 57CAPABILITY_MULTI_ACK_DETAILED = b'multi_ack_detailed' 58CAPABILITY_NO_DONE = b'no-done' 59CAPABILITY_NO_PROGRESS = b'no-progress' 60CAPABILITY_OFS_DELTA = b'ofs-delta' 61CAPABILITY_QUIET = b'quiet' 62CAPABILITY_REPORT_STATUS = b'report-status' 63CAPABILITY_SHALLOW = b'shallow' 64CAPABILITY_SIDE_BAND = b'side-band' 65CAPABILITY_SIDE_BAND_64K = b'side-band-64k' 66CAPABILITY_THIN_PACK = b'thin-pack' 67CAPABILITY_AGENT = b'agent' 68CAPABILITY_SYMREF = b'symref' 69 70# Magic ref that is used to attach capabilities to when 71# there are no refs. Should always be ste to ZERO_SHA. 72CAPABILITIES_REF = b'capabilities^{}' 73 74COMMON_CAPABILITIES = [ 75 CAPABILITY_OFS_DELTA, 76 CAPABILITY_SIDE_BAND, 77 CAPABILITY_SIDE_BAND_64K, 78 CAPABILITY_AGENT, 79 CAPABILITY_NO_PROGRESS] 80KNOWN_UPLOAD_CAPABILITIES = set(COMMON_CAPABILITIES + [ 81 CAPABILITY_THIN_PACK, 82 CAPABILITY_MULTI_ACK, 83 CAPABILITY_MULTI_ACK_DETAILED, 84 CAPABILITY_INCLUDE_TAG, 85 CAPABILITY_DEEPEN_SINCE, 86 CAPABILITY_SYMREF, 87 CAPABILITY_SHALLOW, 88 CAPABILITY_DEEPEN_NOT, 89 CAPABILITY_DEEPEN_RELATIVE, 90 ]) 91KNOWN_RECEIVE_CAPABILITIES = set(COMMON_CAPABILITIES + [ 92 CAPABILITY_REPORT_STATUS]) 93 94 95def agent_string(): 96 return ('dulwich/%d.%d.%d' % dulwich.__version__).encode('ascii') 97 98 99def capability_agent(): 100 return CAPABILITY_AGENT + b'=' + agent_string() 101 102 103def capability_symref(from_ref, to_ref): 104 return CAPABILITY_SYMREF + b'=' + from_ref + b':' + to_ref 105 106 107def extract_capability_names(capabilities): 108 return set(parse_capability(c)[0] for c in capabilities) 109 110 111def parse_capability(capability): 112 parts = capability.split(b'=', 1) 113 if len(parts) == 1: 114 return (parts[0], None) 115 return tuple(parts) 116 117 118def symref_capabilities(symrefs): 119 return [capability_symref(*k) for k in symrefs] 120 121 122COMMAND_DEEPEN = b'deepen' 123COMMAND_SHALLOW = b'shallow' 124COMMAND_UNSHALLOW = b'unshallow' 125COMMAND_DONE = b'done' 126COMMAND_WANT = b'want' 127COMMAND_HAVE = b'have' 128 129 130class ProtocolFile(object): 131 """A dummy file for network ops that expect file-like objects.""" 132 133 def __init__(self, read, write): 134 self.read = read 135 self.write = write 136 137 def tell(self): 138 pass 139 140 def close(self): 141 pass 142 143 144def pkt_line(data): 145 """Wrap data in a pkt-line. 146 147 :param data: The data to wrap, as a str or None. 148 :return: The data prefixed with its length in pkt-line format; if data was 149 None, returns the flush-pkt ('0000'). 150 """ 151 if data is None: 152 return b'0000' 153 return ('%04x' % (len(data) + 4)).encode('ascii') + data 154 155 156class Protocol(object): 157 """Class for interacting with a remote git process over the wire. 158 159 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 160 consists of the length of the line as a 4-byte hex string, followed by the 161 payload data. The length includes the 4-byte header. The special line 162 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 163 164 For details on the pkt-line format, see the cgit distribution: 165 Documentation/technical/protocol-common.txt 166 """ 167 168 def __init__(self, read, write, close=None, report_activity=None): 169 self.read = read 170 self.write = write 171 self._close = close 172 self.report_activity = report_activity 173 self._readahead = None 174 175 def close(self): 176 if self._close: 177 self._close() 178 179 def __enter__(self): 180 return self 181 182 def __exit__(self, exc_type, exc_val, exc_tb): 183 self.close() 184 185 def read_pkt_line(self): 186 """Reads a pkt-line from the remote git process. 187 188 This method may read from the readahead buffer; see unread_pkt_line. 189 190 :return: The next string from the stream, without the length prefix, or 191 None for a flush-pkt ('0000'). 192 """ 193 if self._readahead is None: 194 read = self.read 195 else: 196 read = self._readahead.read 197 self._readahead = None 198 199 try: 200 sizestr = read(4) 201 if not sizestr: 202 raise HangupException() 203 size = int(sizestr, 16) 204 if size == 0: 205 if self.report_activity: 206 self.report_activity(4, 'read') 207 return None 208 if self.report_activity: 209 self.report_activity(size, 'read') 210 pkt_contents = read(size-4) 211 except socket.error as e: 212 raise GitProtocolError(e) 213 else: 214 if len(pkt_contents) + 4 != size: 215 raise GitProtocolError( 216 'Length of pkt read %04x does not match length prefix %04x' 217 % (len(pkt_contents) + 4, size)) 218 return pkt_contents 219 220 def eof(self): 221 """Test whether the protocol stream has reached EOF. 222 223 Note that this refers to the actual stream EOF and not just a 224 flush-pkt. 225 226 :return: True if the stream is at EOF, False otherwise. 227 """ 228 try: 229 next_line = self.read_pkt_line() 230 except HangupException: 231 return True 232 self.unread_pkt_line(next_line) 233 return False 234 235 def unread_pkt_line(self, data): 236 """Unread a single line of data into the readahead buffer. 237 238 This method can be used to unread a single pkt-line into a fixed 239 readahead buffer. 240 241 :param data: The data to unread, without the length prefix. 242 :raise ValueError: If more than one pkt-line is unread. 243 """ 244 if self._readahead is not None: 245 raise ValueError('Attempted to unread multiple pkt-lines.') 246 self._readahead = BytesIO(pkt_line(data)) 247 248 def read_pkt_seq(self): 249 """Read a sequence of pkt-lines from the remote git process. 250 251 :return: Yields each line of data up to but not including the next 252 flush-pkt. 253 """ 254 pkt = self.read_pkt_line() 255 while pkt: 256 yield pkt 257 pkt = self.read_pkt_line() 258 259 def write_pkt_line(self, line): 260 """Sends a pkt-line to the remote git process. 261 262 :param line: A string containing the data to send, without the length 263 prefix. 264 """ 265 try: 266 line = pkt_line(line) 267 self.write(line) 268 if self.report_activity: 269 self.report_activity(len(line), 'write') 270 except socket.error as e: 271 raise GitProtocolError(e) 272 273 def write_file(self): 274 """Return a writable file-like object for this protocol.""" 275 276 class ProtocolFile(object): 277 278 def __init__(self, proto): 279 self._proto = proto 280 self._offset = 0 281 282 def write(self, data): 283 self._proto.write(data) 284 self._offset += len(data) 285 286 def tell(self): 287 return self._offset 288 289 def close(self): 290 pass 291 292 return ProtocolFile(self) 293 294 def write_sideband(self, channel, blob): 295 """Write multiplexed data to the sideband. 296 297 :param channel: An int specifying the channel to write to. 298 :param blob: A blob of data (as a string) to send on this channel. 299 """ 300 # a pktline can be a max of 65520. a sideband line can therefore be 301 # 65520-5 = 65515 302 # WTF: Why have the len in ASCII, but the channel in binary. 303 while blob: 304 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 305 blob = blob[65515:] 306 307 def send_cmd(self, cmd, *args): 308 """Send a command and some arguments to a git server. 309 310 Only used for the TCP git protocol (git://). 311 312 :param cmd: The remote service to access. 313 :param args: List of arguments to send to remove service. 314 """ 315 self.write_pkt_line(cmd + b" " + b"".join([(a + b"\0") for a in args])) 316 317 def read_cmd(self): 318 """Read a command and some arguments from the git client 319 320 Only used for the TCP git protocol (git://). 321 322 :return: A tuple of (command, [list of arguments]). 323 """ 324 line = self.read_pkt_line() 325 splice_at = line.find(b" ") 326 cmd, args = line[:splice_at], line[splice_at+1:] 327 assert args[-1:] == b"\x00" 328 return cmd, args[:-1].split(b"\0") 329 330 331_RBUFSIZE = 8192 # Default read buffer size. 332 333 334class ReceivableProtocol(Protocol): 335 """Variant of Protocol that allows reading up to a size without blocking. 336 337 This class has a recv() method that behaves like socket.recv() in addition 338 to a read() method. 339 340 If you want to read n bytes from the wire and block until exactly n bytes 341 (or EOF) are read, use read(n). If you want to read at most n bytes from 342 the wire but don't care if you get less, use recv(n). Note that recv(n) 343 will still block until at least one byte is read. 344 """ 345 346 def __init__(self, recv, write, close=None, report_activity=None, 347 rbufsize=_RBUFSIZE): 348 super(ReceivableProtocol, self).__init__( 349 self.read, write, close=close, report_activity=report_activity) 350 self._recv = recv 351 self._rbuf = BytesIO() 352 self._rbufsize = rbufsize 353 354 def read(self, size): 355 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 356 # with the following modifications: 357 # - omit the size <= 0 branch 358 # - seek back to start rather than 0 in case some buffer has been 359 # consumed. 360 # - use SEEK_END instead of the magic number. 361 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 362 # Reserved 363 # Licensed under the Python Software Foundation License. 364 # TODO: see if buffer is more efficient than cBytesIO. 365 assert size > 0 366 367 # Our use of BytesIO rather than lists of string objects returned by 368 # recv() minimizes memory usage and fragmentation that occurs when 369 # rbufsize is large compared to the typical return value of recv(). 370 buf = self._rbuf 371 start = buf.tell() 372 buf.seek(0, SEEK_END) 373 # buffer may have been partially consumed by recv() 374 buf_len = buf.tell() - start 375 if buf_len >= size: 376 # Already have size bytes in our buffer? Extract and return. 377 buf.seek(start) 378 rv = buf.read(size) 379 self._rbuf = BytesIO() 380 self._rbuf.write(buf.read()) 381 self._rbuf.seek(0) 382 return rv 383 384 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 385 while True: 386 left = size - buf_len 387 # recv() will malloc the amount of memory given as its 388 # parameter even though it often returns much less data 389 # than that. The returned data string is short lived 390 # as we copy it into a BytesIO and free it. This avoids 391 # fragmentation issues on many platforms. 392 data = self._recv(left) 393 if not data: 394 break 395 n = len(data) 396 if n == size and not buf_len: 397 # Shortcut. Avoid buffer data copies when: 398 # - We have no data in our buffer. 399 # AND 400 # - Our call to recv returned exactly the 401 # number of bytes we were asked to read. 402 return data 403 if n == left: 404 buf.write(data) 405 del data # explicit free 406 break 407 assert n <= left, "_recv(%d) returned %d bytes" % (left, n) 408 buf.write(data) 409 buf_len += n 410 del data # explicit free 411 # assert buf_len == buf.tell() 412 buf.seek(start) 413 return buf.read() 414 415 def recv(self, size): 416 assert size > 0 417 418 buf = self._rbuf 419 start = buf.tell() 420 buf.seek(0, SEEK_END) 421 buf_len = buf.tell() 422 buf.seek(start) 423 424 left = buf_len - start 425 if not left: 426 # only read from the wire if our read buffer is exhausted 427 data = self._recv(self._rbufsize) 428 if len(data) == size: 429 # shortcut: skip the buffer if we read exactly size bytes 430 return data 431 buf = BytesIO() 432 buf.write(data) 433 buf.seek(0) 434 del data # explicit free 435 self._rbuf = buf 436 return buf.read(size) 437 438 439def extract_capabilities(text): 440 """Extract a capabilities list from a string, if present. 441 442 :param text: String to extract from 443 :return: Tuple with text with capabilities removed and list of capabilities 444 """ 445 if b"\0" not in text: 446 return text, [] 447 text, capabilities = text.rstrip().split(b"\0") 448 return (text, capabilities.strip().split(b" ")) 449 450 451def extract_want_line_capabilities(text): 452 """Extract a capabilities list from a want line, if present. 453 454 Note that want lines have capabilities separated from the rest of the line 455 by a space instead of a null byte. Thus want lines have the form: 456 457 want obj-id cap1 cap2 ... 458 459 :param text: Want line to extract from 460 :return: Tuple with text with capabilities removed and list of capabilities 461 """ 462 split_text = text.rstrip().split(b" ") 463 if len(split_text) < 3: 464 return text, [] 465 return (b" ".join(split_text[:2]), split_text[2:]) 466 467 468def ack_type(capabilities): 469 """Extract the ack type from a capabilities list.""" 470 if b'multi_ack_detailed' in capabilities: 471 return MULTI_ACK_DETAILED 472 elif b'multi_ack' in capabilities: 473 return MULTI_ACK 474 return SINGLE_ACK 475 476 477class BufferedPktLineWriter(object): 478 """Writer that wraps its data in pkt-lines and has an independent buffer. 479 480 Consecutive calls to write() wrap the data in a pkt-line and then buffers 481 it until enough lines have been written such that their total length 482 (including length prefix) reach the buffer size. 483 """ 484 485 def __init__(self, write, bufsize=65515): 486 """Initialize the BufferedPktLineWriter. 487 488 :param write: A write callback for the underlying writer. 489 :param bufsize: The internal buffer size, including length prefixes. 490 """ 491 self._write = write 492 self._bufsize = bufsize 493 self._wbuf = BytesIO() 494 self._buflen = 0 495 496 def write(self, data): 497 """Write data, wrapping it in a pkt-line.""" 498 line = pkt_line(data) 499 line_len = len(line) 500 over = self._buflen + line_len - self._bufsize 501 if over >= 0: 502 start = line_len - over 503 self._wbuf.write(line[:start]) 504 self.flush() 505 else: 506 start = 0 507 saved = line[start:] 508 self._wbuf.write(saved) 509 self._buflen += len(saved) 510 511 def flush(self): 512 """Flush all data from the buffer.""" 513 data = self._wbuf.getvalue() 514 if data: 515 self._write(data) 516 self._len = 0 517 self._wbuf = BytesIO() 518 519 520class PktLineParser(object): 521 """Packet line parser that hands completed packets off to a callback. 522 """ 523 524 def __init__(self, handle_pkt): 525 self.handle_pkt = handle_pkt 526 self._readahead = BytesIO() 527 528 def parse(self, data): 529 """Parse a fragment of data and call back for any completed packets. 530 """ 531 self._readahead.write(data) 532 buf = self._readahead.getvalue() 533 if len(buf) < 4: 534 return 535 while len(buf) >= 4: 536 size = int(buf[:4], 16) 537 if size == 0: 538 self.handle_pkt(None) 539 buf = buf[4:] 540 elif size <= len(buf): 541 self.handle_pkt(buf[4:size]) 542 buf = buf[size:] 543 else: 544 break 545 self._readahead = BytesIO() 546 self._readahead.write(buf) 547 548 def get_tail(self): 549 """Read back any unused data.""" 550 return self._readahead.getvalue() 551