1// Copyright 2018 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Package protowire parses and formats the raw wire encoding. 6// See https://developers.google.com/protocol-buffers/docs/encoding. 7// 8// For marshaling and unmarshaling entire protobuf messages, 9// use the "google.golang.org/protobuf/proto" package instead. 10package protowire 11 12import ( 13 "io" 14 "math" 15 "math/bits" 16 17 "google.golang.org/protobuf/internal/errors" 18) 19 20// Number represents the field number. 21type Number int32 22 23const ( 24 MinValidNumber Number = 1 25 FirstReservedNumber Number = 19000 26 LastReservedNumber Number = 19999 27 MaxValidNumber Number = 1<<29 - 1 28) 29 30// IsValid reports whether the field number is semantically valid. 31// 32// Note that while numbers within the reserved range are semantically invalid, 33// they are syntactically valid in the wire format. 34// Implementations may treat records with reserved field numbers as unknown. 35func (n Number) IsValid() bool { 36 return MinValidNumber <= n && n < FirstReservedNumber || LastReservedNumber < n && n <= MaxValidNumber 37} 38 39// Type represents the wire type. 40type Type int8 41 42const ( 43 VarintType Type = 0 44 Fixed32Type Type = 5 45 Fixed64Type Type = 1 46 BytesType Type = 2 47 StartGroupType Type = 3 48 EndGroupType Type = 4 49) 50 51const ( 52 _ = -iota 53 errCodeTruncated 54 errCodeFieldNumber 55 errCodeOverflow 56 errCodeReserved 57 errCodeEndGroup 58) 59 60var ( 61 errFieldNumber = errors.New("invalid field number") 62 errOverflow = errors.New("variable length integer overflow") 63 errReserved = errors.New("cannot parse reserved wire type") 64 errEndGroup = errors.New("mismatching end group marker") 65 errParse = errors.New("parse error") 66) 67 68// ParseError converts an error code into an error value. 69// This returns nil if n is a non-negative number. 70func ParseError(n int) error { 71 if n >= 0 { 72 return nil 73 } 74 switch n { 75 case errCodeTruncated: 76 return io.ErrUnexpectedEOF 77 case errCodeFieldNumber: 78 return errFieldNumber 79 case errCodeOverflow: 80 return errOverflow 81 case errCodeReserved: 82 return errReserved 83 case errCodeEndGroup: 84 return errEndGroup 85 default: 86 return errParse 87 } 88} 89 90// ConsumeField parses an entire field record (both tag and value) and returns 91// the field number, the wire type, and the total length. 92// This returns a negative length upon an error (see ParseError). 93// 94// The total length includes the tag header and the end group marker (if the 95// field is a group). 96func ConsumeField(b []byte) (Number, Type, int) { 97 num, typ, n := ConsumeTag(b) 98 if n < 0 { 99 return 0, 0, n // forward error code 100 } 101 m := ConsumeFieldValue(num, typ, b[n:]) 102 if m < 0 { 103 return 0, 0, m // forward error code 104 } 105 return num, typ, n + m 106} 107 108// ConsumeFieldValue parses a field value and returns its length. 109// This assumes that the field Number and wire Type have already been parsed. 110// This returns a negative length upon an error (see ParseError). 111// 112// When parsing a group, the length includes the end group marker and 113// the end group is verified to match the starting field number. 114func ConsumeFieldValue(num Number, typ Type, b []byte) (n int) { 115 switch typ { 116 case VarintType: 117 _, n = ConsumeVarint(b) 118 return n 119 case Fixed32Type: 120 _, n = ConsumeFixed32(b) 121 return n 122 case Fixed64Type: 123 _, n = ConsumeFixed64(b) 124 return n 125 case BytesType: 126 _, n = ConsumeBytes(b) 127 return n 128 case StartGroupType: 129 n0 := len(b) 130 for { 131 num2, typ2, n := ConsumeTag(b) 132 if n < 0 { 133 return n // forward error code 134 } 135 b = b[n:] 136 if typ2 == EndGroupType { 137 if num != num2 { 138 return errCodeEndGroup 139 } 140 return n0 - len(b) 141 } 142 143 n = ConsumeFieldValue(num2, typ2, b) 144 if n < 0 { 145 return n // forward error code 146 } 147 b = b[n:] 148 } 149 case EndGroupType: 150 return errCodeEndGroup 151 default: 152 return errCodeReserved 153 } 154} 155 156// AppendTag encodes num and typ as a varint-encoded tag and appends it to b. 157func AppendTag(b []byte, num Number, typ Type) []byte { 158 return AppendVarint(b, EncodeTag(num, typ)) 159} 160 161// ConsumeTag parses b as a varint-encoded tag, reporting its length. 162// This returns a negative length upon an error (see ParseError). 163func ConsumeTag(b []byte) (Number, Type, int) { 164 v, n := ConsumeVarint(b) 165 if n < 0 { 166 return 0, 0, n // forward error code 167 } 168 num, typ := DecodeTag(v) 169 if num < MinValidNumber { 170 return 0, 0, errCodeFieldNumber 171 } 172 return num, typ, n 173} 174 175func SizeTag(num Number) int { 176 return SizeVarint(EncodeTag(num, 0)) // wire type has no effect on size 177} 178 179// AppendVarint appends v to b as a varint-encoded uint64. 180func AppendVarint(b []byte, v uint64) []byte { 181 switch { 182 case v < 1<<7: 183 b = append(b, byte(v)) 184 case v < 1<<14: 185 b = append(b, 186 byte((v>>0)&0x7f|0x80), 187 byte(v>>7)) 188 case v < 1<<21: 189 b = append(b, 190 byte((v>>0)&0x7f|0x80), 191 byte((v>>7)&0x7f|0x80), 192 byte(v>>14)) 193 case v < 1<<28: 194 b = append(b, 195 byte((v>>0)&0x7f|0x80), 196 byte((v>>7)&0x7f|0x80), 197 byte((v>>14)&0x7f|0x80), 198 byte(v>>21)) 199 case v < 1<<35: 200 b = append(b, 201 byte((v>>0)&0x7f|0x80), 202 byte((v>>7)&0x7f|0x80), 203 byte((v>>14)&0x7f|0x80), 204 byte((v>>21)&0x7f|0x80), 205 byte(v>>28)) 206 case v < 1<<42: 207 b = append(b, 208 byte((v>>0)&0x7f|0x80), 209 byte((v>>7)&0x7f|0x80), 210 byte((v>>14)&0x7f|0x80), 211 byte((v>>21)&0x7f|0x80), 212 byte((v>>28)&0x7f|0x80), 213 byte(v>>35)) 214 case v < 1<<49: 215 b = append(b, 216 byte((v>>0)&0x7f|0x80), 217 byte((v>>7)&0x7f|0x80), 218 byte((v>>14)&0x7f|0x80), 219 byte((v>>21)&0x7f|0x80), 220 byte((v>>28)&0x7f|0x80), 221 byte((v>>35)&0x7f|0x80), 222 byte(v>>42)) 223 case v < 1<<56: 224 b = append(b, 225 byte((v>>0)&0x7f|0x80), 226 byte((v>>7)&0x7f|0x80), 227 byte((v>>14)&0x7f|0x80), 228 byte((v>>21)&0x7f|0x80), 229 byte((v>>28)&0x7f|0x80), 230 byte((v>>35)&0x7f|0x80), 231 byte((v>>42)&0x7f|0x80), 232 byte(v>>49)) 233 case v < 1<<63: 234 b = append(b, 235 byte((v>>0)&0x7f|0x80), 236 byte((v>>7)&0x7f|0x80), 237 byte((v>>14)&0x7f|0x80), 238 byte((v>>21)&0x7f|0x80), 239 byte((v>>28)&0x7f|0x80), 240 byte((v>>35)&0x7f|0x80), 241 byte((v>>42)&0x7f|0x80), 242 byte((v>>49)&0x7f|0x80), 243 byte(v>>56)) 244 default: 245 b = append(b, 246 byte((v>>0)&0x7f|0x80), 247 byte((v>>7)&0x7f|0x80), 248 byte((v>>14)&0x7f|0x80), 249 byte((v>>21)&0x7f|0x80), 250 byte((v>>28)&0x7f|0x80), 251 byte((v>>35)&0x7f|0x80), 252 byte((v>>42)&0x7f|0x80), 253 byte((v>>49)&0x7f|0x80), 254 byte((v>>56)&0x7f|0x80), 255 1) 256 } 257 return b 258} 259 260// ConsumeVarint parses b as a varint-encoded uint64, reporting its length. 261// This returns a negative length upon an error (see ParseError). 262func ConsumeVarint(b []byte) (v uint64, n int) { 263 var y uint64 264 if len(b) <= 0 { 265 return 0, errCodeTruncated 266 } 267 v = uint64(b[0]) 268 if v < 0x80 { 269 return v, 1 270 } 271 v -= 0x80 272 273 if len(b) <= 1 { 274 return 0, errCodeTruncated 275 } 276 y = uint64(b[1]) 277 v += y << 7 278 if y < 0x80 { 279 return v, 2 280 } 281 v -= 0x80 << 7 282 283 if len(b) <= 2 { 284 return 0, errCodeTruncated 285 } 286 y = uint64(b[2]) 287 v += y << 14 288 if y < 0x80 { 289 return v, 3 290 } 291 v -= 0x80 << 14 292 293 if len(b) <= 3 { 294 return 0, errCodeTruncated 295 } 296 y = uint64(b[3]) 297 v += y << 21 298 if y < 0x80 { 299 return v, 4 300 } 301 v -= 0x80 << 21 302 303 if len(b) <= 4 { 304 return 0, errCodeTruncated 305 } 306 y = uint64(b[4]) 307 v += y << 28 308 if y < 0x80 { 309 return v, 5 310 } 311 v -= 0x80 << 28 312 313 if len(b) <= 5 { 314 return 0, errCodeTruncated 315 } 316 y = uint64(b[5]) 317 v += y << 35 318 if y < 0x80 { 319 return v, 6 320 } 321 v -= 0x80 << 35 322 323 if len(b) <= 6 { 324 return 0, errCodeTruncated 325 } 326 y = uint64(b[6]) 327 v += y << 42 328 if y < 0x80 { 329 return v, 7 330 } 331 v -= 0x80 << 42 332 333 if len(b) <= 7 { 334 return 0, errCodeTruncated 335 } 336 y = uint64(b[7]) 337 v += y << 49 338 if y < 0x80 { 339 return v, 8 340 } 341 v -= 0x80 << 49 342 343 if len(b) <= 8 { 344 return 0, errCodeTruncated 345 } 346 y = uint64(b[8]) 347 v += y << 56 348 if y < 0x80 { 349 return v, 9 350 } 351 v -= 0x80 << 56 352 353 if len(b) <= 9 { 354 return 0, errCodeTruncated 355 } 356 y = uint64(b[9]) 357 v += y << 63 358 if y < 2 { 359 return v, 10 360 } 361 return 0, errCodeOverflow 362} 363 364// SizeVarint returns the encoded size of a varint. 365// The size is guaranteed to be within 1 and 10, inclusive. 366func SizeVarint(v uint64) int { 367 // This computes 1 + (bits.Len64(v)-1)/7. 368 // 9/64 is a good enough approximation of 1/7 369 return int(9*uint32(bits.Len64(v))+64) / 64 370} 371 372// AppendFixed32 appends v to b as a little-endian uint32. 373func AppendFixed32(b []byte, v uint32) []byte { 374 return append(b, 375 byte(v>>0), 376 byte(v>>8), 377 byte(v>>16), 378 byte(v>>24)) 379} 380 381// ConsumeFixed32 parses b as a little-endian uint32, reporting its length. 382// This returns a negative length upon an error (see ParseError). 383func ConsumeFixed32(b []byte) (v uint32, n int) { 384 if len(b) < 4 { 385 return 0, errCodeTruncated 386 } 387 v = uint32(b[0])<<0 | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 388 return v, 4 389} 390 391// SizeFixed32 returns the encoded size of a fixed32; which is always 4. 392func SizeFixed32() int { 393 return 4 394} 395 396// AppendFixed64 appends v to b as a little-endian uint64. 397func AppendFixed64(b []byte, v uint64) []byte { 398 return append(b, 399 byte(v>>0), 400 byte(v>>8), 401 byte(v>>16), 402 byte(v>>24), 403 byte(v>>32), 404 byte(v>>40), 405 byte(v>>48), 406 byte(v>>56)) 407} 408 409// ConsumeFixed64 parses b as a little-endian uint64, reporting its length. 410// This returns a negative length upon an error (see ParseError). 411func ConsumeFixed64(b []byte) (v uint64, n int) { 412 if len(b) < 8 { 413 return 0, errCodeTruncated 414 } 415 v = uint64(b[0])<<0 | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 416 return v, 8 417} 418 419// SizeFixed64 returns the encoded size of a fixed64; which is always 8. 420func SizeFixed64() int { 421 return 8 422} 423 424// AppendBytes appends v to b as a length-prefixed bytes value. 425func AppendBytes(b []byte, v []byte) []byte { 426 return append(AppendVarint(b, uint64(len(v))), v...) 427} 428 429// ConsumeBytes parses b as a length-prefixed bytes value, reporting its length. 430// This returns a negative length upon an error (see ParseError). 431func ConsumeBytes(b []byte) (v []byte, n int) { 432 m, n := ConsumeVarint(b) 433 if n < 0 { 434 return nil, n // forward error code 435 } 436 if m > uint64(len(b[n:])) { 437 return nil, errCodeTruncated 438 } 439 return b[n:][:m], n + int(m) 440} 441 442// SizeBytes returns the encoded size of a length-prefixed bytes value, 443// given only the length. 444func SizeBytes(n int) int { 445 return SizeVarint(uint64(n)) + n 446} 447 448// AppendString appends v to b as a length-prefixed bytes value. 449func AppendString(b []byte, v string) []byte { 450 return append(AppendVarint(b, uint64(len(v))), v...) 451} 452 453// ConsumeString parses b as a length-prefixed bytes value, reporting its length. 454// This returns a negative length upon an error (see ParseError). 455func ConsumeString(b []byte) (v string, n int) { 456 bb, n := ConsumeBytes(b) 457 return string(bb), n 458} 459 460// AppendGroup appends v to b as group value, with a trailing end group marker. 461// The value v must not contain the end marker. 462func AppendGroup(b []byte, num Number, v []byte) []byte { 463 return AppendVarint(append(b, v...), EncodeTag(num, EndGroupType)) 464} 465 466// ConsumeGroup parses b as a group value until the trailing end group marker, 467// and verifies that the end marker matches the provided num. The value v 468// does not contain the end marker, while the length does contain the end marker. 469// This returns a negative length upon an error (see ParseError). 470func ConsumeGroup(num Number, b []byte) (v []byte, n int) { 471 n = ConsumeFieldValue(num, StartGroupType, b) 472 if n < 0 { 473 return nil, n // forward error code 474 } 475 b = b[:n] 476 477 // Truncate off end group marker, but need to handle denormalized varints. 478 // Assuming end marker is never 0 (which is always the case since 479 // EndGroupType is non-zero), we can truncate all trailing bytes where the 480 // lower 7 bits are all zero (implying that the varint is denormalized). 481 for len(b) > 0 && b[len(b)-1]&0x7f == 0 { 482 b = b[:len(b)-1] 483 } 484 b = b[:len(b)-SizeTag(num)] 485 return b, n 486} 487 488// SizeGroup returns the encoded size of a group, given only the length. 489func SizeGroup(num Number, n int) int { 490 return n + SizeTag(num) 491} 492 493// DecodeTag decodes the field Number and wire Type from its unified form. 494// The Number is -1 if the decoded field number overflows int32. 495// Other than overflow, this does not check for field number validity. 496func DecodeTag(x uint64) (Number, Type) { 497 // NOTE: MessageSet allows for larger field numbers than normal. 498 if x>>3 > uint64(math.MaxInt32) { 499 return -1, 0 500 } 501 return Number(x >> 3), Type(x & 7) 502} 503 504// EncodeTag encodes the field Number and wire Type into its unified form. 505func EncodeTag(num Number, typ Type) uint64 { 506 return uint64(num)<<3 | uint64(typ&7) 507} 508 509// DecodeZigZag decodes a zig-zag-encoded uint64 as an int64. 510// Input: {…, 5, 3, 1, 0, 2, 4, 6, …} 511// Output: {…, -3, -2, -1, 0, +1, +2, +3, …} 512func DecodeZigZag(x uint64) int64 { 513 return int64(x>>1) ^ int64(x)<<63>>63 514} 515 516// EncodeZigZag encodes an int64 as a zig-zag-encoded uint64. 517// Input: {…, -3, -2, -1, 0, +1, +2, +3, …} 518// Output: {…, 5, 3, 1, 0, 2, 4, 6, …} 519func EncodeZigZag(x int64) uint64 { 520 return uint64(x<<1) ^ uint64(x>>63) 521} 522 523// DecodeBool decodes a uint64 as a bool. 524// Input: { 0, 1, 2, …} 525// Output: {false, true, true, …} 526func DecodeBool(x uint64) bool { 527 return x != 0 528} 529 530// EncodeBool encodes a bool as a uint64. 531// Input: {false, true} 532// Output: { 0, 1} 533func EncodeBool(x bool) uint64 { 534 if x { 535 return 1 536 } 537 return 0 538} 539