1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33 
34 #include <cstdint>
35 #include <cstring>
36 #include <string>
37 
38 #include <google/protobuf/io/coded_stream.h>
39 #include <google/protobuf/io/zero_copy_stream.h>
40 #include <google/protobuf/arena.h>
41 #include <google/protobuf/arenastring.h>
42 #include <google/protobuf/implicit_weak_message.h>
43 #include <google/protobuf/metadata_lite.h>
44 #include <google/protobuf/port.h>
45 #include <google/protobuf/repeated_field.h>
46 #include <google/protobuf/wire_format_lite.h>
47 #include <google/protobuf/stubs/strutil.h>
48 
49 #include <google/protobuf/port_def.inc>
50 
51 
52 namespace google {
53 namespace protobuf {
54 
55 class UnknownFieldSet;
56 class DescriptorPool;
57 class MessageFactory;
58 
59 namespace internal {
60 
61 // Template code below needs to know about the existence of these functions.
62 PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
63 PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
64                                           std::string* s);
65 // Inline because it is just forwarding to s->WriteVarint
66 inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
67 inline void WriteLengthDelimited(uint32 num, StringPiece val,
68                                  UnknownFieldSet* s);
69 
70 
71 // The basic abstraction the parser is designed for is a slight modification
72 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
73 // stream as a series of buffers that concatenate to the full stream.
74 // Pictorially a ZCIS presents a stream in chunks like so
75 // [---------------------------------------------------------------]
76 // [---------------------] chunk 1
77 //                      [----------------------------] chunk 2
78 //                                          chunk 3 [--------------]
79 //
80 // Where the '-' represent the bytes which are vertically lined up with the
81 // bytes of the stream. The proto parser requires its input to be presented
82 // similarly with the extra
83 // property that each chunk has kSlopBytes past its end that overlaps with the
84 // first kSlopBytes of the next chunk, or if there is no next chunk at least its
85 // still valid to read those bytes. Again, pictorially, we now have
86 //
87 // [---------------------------------------------------------------]
88 // [-------------------....] chunk 1
89 //                    [------------------------....] chunk 2
90 //                                    chunk 3 [------------------..**]
91 //                                                      chunk 4 [--****]
92 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
93 // chunk that match up with the start of the next chunk. Above each chunk has
94 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
95 // past the stream, indicated by '*' above, their values are unspecified. It is
96 // still legal to read them (ie. should not segfault). Reading past the
97 // end should be detected by the user and indicated as an error.
98 //
99 // The reason for this, admittedly, unconventional invariant is to ruthlessly
100 // optimize the protobuf parser. Having an overlap helps in two important ways.
101 // Firstly it alleviates having to performing bounds checks if a piece of code
102 // is guaranteed to not read more than kSlopBytes. Secondly, and more
103 // importantly, the protobuf wireformat is such that reading a key/value pair is
104 // always less than 16 bytes. This removes the need to change to next buffer in
105 // the middle of reading primitive values. Hence there is no need to store and
106 // load the current position.
107 
108 class PROTOBUF_EXPORT EpsCopyInputStream {
109  public:
110   enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
111 
EpsCopyInputStream(bool enable_aliasing)112   explicit EpsCopyInputStream(bool enable_aliasing)
113       : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
114 
BackUp(const char * ptr)115   void BackUp(const char* ptr) {
116     GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
117     int count;
118     if (next_chunk_ == buffer_) {
119       count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
120     } else {
121       count = size_ + static_cast<int>(buffer_end_ - ptr);
122     }
123     if (count > 0) StreamBackUp(count);
124   }
125 
126   // If return value is negative it's an error
PushLimit(const char * ptr,int limit)127   PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
128     GOOGLE_DCHECK(limit >= 0 && limit <= INT_MAX - kSlopBytes);
129     // This add is safe due to the invariant above, because
130     // ptr - buffer_end_ <= kSlopBytes.
131     limit += static_cast<int>(ptr - buffer_end_);
132     limit_end_ = buffer_end_ + (std::min)(0, limit);
133     auto old_limit = limit_;
134     limit_ = limit;
135     return old_limit - limit;
136   }
137 
PopLimit(int delta)138   PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
139     if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
140     limit_ = limit_ + delta;
141     // TODO(gerbens) We could remove this line and hoist the code to
142     // DoneFallback. Study the perf/bin-size effects.
143     limit_end_ = buffer_end_ + (std::min)(0, limit_);
144     return true;
145   }
146 
Skip(const char * ptr,int size)147   PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
148     if (size <= buffer_end_ + kSlopBytes - ptr) {
149       return ptr + size;
150     }
151     return SkipFallback(ptr, size);
152   }
ReadString(const char * ptr,int size,std::string * s)153   PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
154                                                   std::string* s) {
155     if (size <= buffer_end_ + kSlopBytes - ptr) {
156       s->assign(ptr, size);
157       return ptr + size;
158     }
159     return ReadStringFallback(ptr, size, s);
160   }
AppendString(const char * ptr,int size,std::string * s)161   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
162                                                     std::string* s) {
163     if (size <= buffer_end_ + kSlopBytes - ptr) {
164       s->append(ptr, size);
165       return ptr + size;
166     }
167     return AppendStringFallback(ptr, size, s);
168   }
169   // Implemented in arenastring.cc
170   PROTOBUF_MUST_USE_RESULT const char* ReadArenaString(const char* ptr,
171                                                        ArenaStringPtr* s,
172                                                        Arena* arena);
173 
174   template <typename Tag, typename T>
175   PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
176                                                          Tag expected_tag,
177                                                          RepeatedField<T>* out);
178 
179   template <typename T>
180   PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
181                                                        int size,
182                                                        RepeatedField<T>* out);
183   template <typename Add>
184   PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
185                                                         Add add);
186 
LastTag()187   uint32 LastTag() const { return last_tag_minus_1_ + 1; }
ConsumeEndGroup(uint32 start_tag)188   bool ConsumeEndGroup(uint32 start_tag) {
189     bool res = last_tag_minus_1_ == start_tag;
190     last_tag_minus_1_ = 0;
191     return res;
192   }
EndedAtLimit()193   bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
EndedAtEndOfStream()194   bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
SetLastTag(uint32 tag)195   void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; }
SetEndOfStream()196   void SetEndOfStream() { last_tag_minus_1_ = 1; }
IsExceedingLimit(const char * ptr)197   bool IsExceedingLimit(const char* ptr) {
198     return ptr > limit_end_ &&
199            (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
200   }
BytesUntilLimit(const char * ptr)201   int BytesUntilLimit(const char* ptr) const {
202     return limit_ + static_cast<int>(buffer_end_ - ptr);
203   }
204   // Returns true if more data is available, if false is returned one has to
205   // call Done for further checks.
DataAvailable(const char * ptr)206   bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
207 
208  protected:
209   // Returns true is limit (either an explicit limit or end of stream) is
210   // reached. It aligns *ptr across buffer seams.
211   // If limit is exceeded it returns true and ptr is set to null.
DoneWithCheck(const char ** ptr,int d)212   bool DoneWithCheck(const char** ptr, int d) {
213     GOOGLE_DCHECK(*ptr);
214     if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
215     int overrun = static_cast<int>(*ptr - buffer_end_);
216     GOOGLE_DCHECK_LE(overrun, kSlopBytes);  // Guaranteed by parse loop.
217     if (overrun ==
218         limit_) {  //  No need to flip buffers if we ended on a limit.
219       // If we actually overrun the buffer and next_chunk_ is null. It means
220       // the stream ended and we passed the stream end.
221       if (overrun > 0 && next_chunk_ == nullptr) *ptr = nullptr;
222       return true;
223     }
224     auto res = DoneFallback(overrun, d);
225     *ptr = res.first;
226     return res.second;
227   }
228 
InitFrom(StringPiece flat)229   const char* InitFrom(StringPiece flat) {
230     overall_limit_ = 0;
231     if (flat.size() > kSlopBytes) {
232       limit_ = kSlopBytes;
233       limit_end_ = buffer_end_ = flat.data() + flat.size() - kSlopBytes;
234       next_chunk_ = buffer_;
235       if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
236       return flat.data();
237     } else {
238       std::memcpy(buffer_, flat.data(), flat.size());
239       limit_ = 0;
240       limit_end_ = buffer_end_ = buffer_ + flat.size();
241       next_chunk_ = nullptr;
242       if (aliasing_ == kOnPatch) {
243         aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
244                     reinterpret_cast<std::uintptr_t>(buffer_);
245       }
246       return buffer_;
247     }
248   }
249 
250   const char* InitFrom(io::ZeroCopyInputStream* zcis);
251 
InitFrom(io::ZeroCopyInputStream * zcis,int limit)252   const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
253     if (limit == -1) return InitFrom(zcis);
254     overall_limit_ = limit;
255     auto res = InitFrom(zcis);
256     limit_ = limit - static_cast<int>(buffer_end_ - res);
257     limit_end_ = buffer_end_ + (std::min)(0, limit_);
258     return res;
259   }
260 
261  private:
262   const char* limit_end_;  // buffer_end_ + min(limit_, 0)
263   const char* buffer_end_;
264   const char* next_chunk_;
265   int size_;
266   int limit_;  // relative to buffer_end_;
267   io::ZeroCopyInputStream* zcis_ = nullptr;
268   char buffer_[2 * kSlopBytes] = {};
269   enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
270   std::uintptr_t aliasing_ = kNoAliasing;
271   // This variable is used to communicate how the parse ended, in order to
272   // completely verify the parsed data. A wire-format parse can end because of
273   // one of the following conditions:
274   // 1) A parse can end on a pushed limit.
275   // 2) A parse can end on End Of Stream (EOS).
276   // 3) A parse can end on 0 tag (only valid for toplevel message).
277   // 4) A parse can end on an end-group tag.
278   // This variable should always be set to 0, which indicates case 1. If the
279   // parse terminated due to EOS (case 2), it's set to 1. In case the parse
280   // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
281   // This var doesn't really belong in EpsCopyInputStream and should be part of
282   // the ParseContext, but case 2 is most easily and optimally implemented in
283   // DoneFallback.
284   uint32 last_tag_minus_1_ = 0;
285   int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits.
286   // Pretty random large number that seems like a safe allocation on most
287   // systems. TODO(gerbens) do we need to set this as build flag?
288   enum { kSafeStringSize = 50000000 };
289 
290   // Advances to next buffer chunk returns a pointer to the same logical place
291   // in the stream as set by overrun. Overrun indicates the position in the slop
292   // region the parse was left (0 <= overrun <= kSlopBytes). Returns true if at
293   // limit, at which point the returned pointer maybe null if there was an
294   // error. The invariant of this function is that it's guaranteed that
295   // kSlopBytes bytes can be accessed from the returned ptr. This function might
296   // advance more buffers than one in the underlying ZeroCopyInputStream.
297   std::pair<const char*, bool> DoneFallback(int overrun, int depth);
298   // Advances to the next buffer, at most one call to Next() on the underlying
299   // ZeroCopyInputStream is made. This function DOES NOT match the returned
300   // pointer to where in the slop region the parse ends, hence no overrun
301   // parameter. This is useful for string operations where you always copy
302   // to the end of the buffer (including the slop region).
303   const char* Next();
304   // overrun is the location in the slop region the stream currently is
305   // (0 <= overrun <= kSlopBytes). To prevent flipping to the next buffer of
306   // the ZeroCopyInputStream in the case the parse will end in the last
307   // kSlopBytes of the current buffer. depth is the current depth of nested
308   // groups (or negative if the use case does not need careful tracking).
309   inline const char* NextBuffer(int overrun, int depth);
310   const char* SkipFallback(const char* ptr, int size);
311   const char* AppendStringFallback(const char* ptr, int size, std::string* str);
312   const char* ReadStringFallback(const char* ptr, int size, std::string* str);
StreamNext(const void ** data)313   bool StreamNext(const void** data) {
314     bool res = zcis_->Next(data, &size_);
315     if (res) overall_limit_ -= size_;
316     return res;
317   }
StreamBackUp(int count)318   void StreamBackUp(int count) {
319     zcis_->BackUp(count);
320     overall_limit_ += count;
321   }
322 
323   template <typename A>
AppendSize(const char * ptr,int size,const A & append)324   const char* AppendSize(const char* ptr, int size, const A& append) {
325     int chunk_size = buffer_end_ + kSlopBytes - ptr;
326     do {
327       GOOGLE_DCHECK(size > chunk_size);
328       if (next_chunk_ == nullptr) return nullptr;
329       append(ptr, chunk_size);
330       ptr += chunk_size;
331       size -= chunk_size;
332       // TODO(gerbens) Next calls NextBuffer which generates buffers with
333       // overlap and thus incurs cost of copying the slop regions. This is not
334       // necessary for reading strings. We should just call Next buffers.
335       if (limit_ <= kSlopBytes) return nullptr;
336       ptr = Next();
337       if (ptr == nullptr) return nullptr;  // passed the limit
338       ptr += kSlopBytes;
339       chunk_size = buffer_end_ + kSlopBytes - ptr;
340     } while (size > chunk_size);
341     append(ptr, size);
342     return ptr + size;
343   }
344 
345   // AppendUntilEnd appends data until a limit (either a PushLimit or end of
346   // stream. Normal payloads are from length delimited fields which have an
347   // explicit size. Reading until limit only comes when the string takes
348   // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
349   // implicit weak messages. We keep these methods private and friend them.
350   template <typename A>
AppendUntilEnd(const char * ptr,const A & append)351   const char* AppendUntilEnd(const char* ptr, const A& append) {
352     if (ptr - buffer_end_ > limit_) return nullptr;
353     while (limit_ > kSlopBytes) {
354       size_t chunk_size = buffer_end_ + kSlopBytes - ptr;
355       append(ptr, chunk_size);
356       ptr = Next();
357       if (ptr == nullptr) return limit_end_;
358       ptr += kSlopBytes;
359     }
360     auto end = buffer_end_ + limit_;
361     GOOGLE_DCHECK(end >= ptr);
362     append(ptr, end - ptr);
363     return end;
364   }
365 
AppendString(const char * ptr,std::string * str)366   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
367                                                     std::string* str) {
368     return AppendUntilEnd(
369         ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
370   }
371   friend class ImplicitWeakMessage;
372 };
373 
374 // ParseContext holds all data that is global to the entire parse. Most
375 // importantly it contains the input stream, but also recursion depth and also
376 // stores the end group tag, in case a parser ended on a endgroup, to verify
377 // matching start/end group tags.
378 class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
379  public:
380   struct Data {
381     const DescriptorPool* pool = nullptr;
382     MessageFactory* factory = nullptr;
383     Arena* arena = nullptr;
384   };
385 
386   template <typename... T>
ParseContext(int depth,bool aliasing,const char ** start,T &&...args)387   ParseContext(int depth, bool aliasing, const char** start, T&&... args)
388       : EpsCopyInputStream(aliasing), depth_(depth) {
389     *start = InitFrom(std::forward<T>(args)...);
390   }
391 
TrackCorrectEnding()392   void TrackCorrectEnding() { group_depth_ = 0; }
393 
Done(const char ** ptr)394   bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
395 
depth()396   int depth() const { return depth_; }
397 
data()398   Data& data() { return data_; }
data()399   const Data& data() const { return data_; }
400 
401   template <typename T>
402   PROTOBUF_MUST_USE_RESULT const char* ParseMessage(T* msg, const char* ptr);
403   // We outline when the type is generic and we go through a virtual
404   const char* ParseMessage(MessageLite* msg, const char* ptr);
405   const char* ParseMessage(Message* msg, const char* ptr);
406 
407   template <typename T>
ParseGroup(T * msg,const char * ptr,uint32 tag)408   PROTOBUF_MUST_USE_RESULT PROTOBUF_NDEBUG_INLINE const char* ParseGroup(
409       T* msg, const char* ptr, uint32 tag) {
410     if (--depth_ < 0) return nullptr;
411     group_depth_++;
412     ptr = msg->_InternalParse(ptr, this);
413     group_depth_--;
414     depth_++;
415     if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
416     return ptr;
417   }
418 
419  private:
420   // Out-of-line routine to save space in ParseContext::ParseMessage<T>
421   //   int old;
422   //   ptr = ReadSizeAndPushLimitAndDepth(ptr, &old)
423   // is equivalent to:
424   //   int size = ReadSize(&ptr);
425   //   if (!ptr) return nullptr;
426   //   int old = PushLimit(ptr, size);
427   //   if (--depth_ < 0) return nullptr;
428   PROTOBUF_MUST_USE_RESULT const char* ReadSizeAndPushLimitAndDepth(
429       const char* ptr, int* old_limit);
430 
431   // The context keeps an internal stack to keep track of the recursive
432   // part of the parse state.
433   // Current depth of the active parser, depth counts down.
434   // This is used to limit recursion depth (to prevent overflow on malicious
435   // data), but is also used to index in stack_ to store the current state.
436   int depth_;
437   // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
438   // in the last kSlopBytes of a ZeroCopyInputStream chunk.
439   int group_depth_ = INT_MIN;
440   Data data_;
441 };
442 
443 template <uint32 tag>
ExpectTag(const char * ptr)444 bool ExpectTag(const char* ptr) {
445   if (tag < 128) {
446     return *ptr == static_cast<char>(tag);
447   } else {
448     static_assert(tag < 128 * 128, "We only expect tags for 1 or 2 bytes");
449     char buf[2] = {static_cast<char>(tag | 0x80), static_cast<char>(tag >> 7)};
450     return std::memcmp(ptr, buf, 2) == 0;
451   }
452 }
453 
454 template <int>
455 struct EndianHelper;
456 
457 template <>
458 struct EndianHelper<1> {
459   static uint8 Load(const void* p) { return *static_cast<const uint8*>(p); }
460 };
461 
462 template <>
463 struct EndianHelper<2> {
464   static uint16 Load(const void* p) {
465     uint16 tmp;
466     std::memcpy(&tmp, p, 2);
467 #ifndef PROTOBUF_LITTLE_ENDIAN
468     tmp = bswap_16(tmp);
469 #endif
470     return tmp;
471   }
472 };
473 
474 template <>
475 struct EndianHelper<4> {
476   static uint32 Load(const void* p) {
477     uint32 tmp;
478     std::memcpy(&tmp, p, 4);
479 #ifndef PROTOBUF_LITTLE_ENDIAN
480     tmp = bswap_32(tmp);
481 #endif
482     return tmp;
483   }
484 };
485 
486 template <>
487 struct EndianHelper<8> {
488   static uint64 Load(const void* p) {
489     uint64 tmp;
490     std::memcpy(&tmp, p, 8);
491 #ifndef PROTOBUF_LITTLE_ENDIAN
492     tmp = bswap_64(tmp);
493 #endif
494     return tmp;
495   }
496 };
497 
498 template <typename T>
499 T UnalignedLoad(const char* p) {
500   auto tmp = EndianHelper<sizeof(T)>::Load(p);
501   T res;
502   memcpy(&res, &tmp, sizeof(T));
503   return res;
504 }
505 
506 PROTOBUF_EXPORT
507 std::pair<const char*, uint32> VarintParseSlow32(const char* p, uint32 res);
508 PROTOBUF_EXPORT
509 std::pair<const char*, uint64> VarintParseSlow64(const char* p, uint32 res);
510 
511 inline const char* VarintParseSlow(const char* p, uint32 res, uint32* out) {
512   auto tmp = VarintParseSlow32(p, res);
513   *out = tmp.second;
514   return tmp.first;
515 }
516 
517 inline const char* VarintParseSlow(const char* p, uint32 res, uint64* out) {
518   auto tmp = VarintParseSlow64(p, res);
519   *out = tmp.second;
520   return tmp.first;
521 }
522 
523 template <typename T>
524 PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
525   auto ptr = reinterpret_cast<const uint8*>(p);
526   uint32 res = ptr[0];
527   if (!(res & 0x80)) {
528     *out = res;
529     return p + 1;
530   }
531   uint32 byte = ptr[1];
532   res += (byte - 1) << 7;
533   if (!(byte & 0x80)) {
534     *out = res;
535     return p + 2;
536   }
537   return VarintParseSlow(p, res, out);
538 }
539 
540 // Used for tags, could read up to 5 bytes which must be available.
541 // Caller must ensure its safe to call.
542 
543 PROTOBUF_EXPORT
544 std::pair<const char*, uint32> ReadTagFallback(const char* p, uint32 res);
545 
546 // Same as ParseVarint but only accept 5 bytes at most.
547 inline const char* ReadTag(const char* p, uint32* out, uint32 /*max_tag*/ = 0) {
548   uint32 res = static_cast<uint8>(p[0]);
549   if (res < 128) {
550     *out = res;
551     return p + 1;
552   }
553   uint32 second = static_cast<uint8>(p[1]);
554   res += (second - 1) << 7;
555   if (second < 128) {
556     *out = res;
557     return p + 2;
558   }
559   auto tmp = ReadTagFallback(p, res);
560   *out = tmp.second;
561   return tmp.first;
562 }
563 
564 // Decode 2 consecutive bytes of a varint and returns the value, shifted left
565 // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
566 // first byte's continuation bit is set.
567 // If bit 15 of return value is set (equivalent to the continuation bits of both
568 // bytes being set) the varint continues, otherwise the parse is done. On x86
569 // movsx eax, dil
570 // add edi, eax
571 // adc [rsi], 1
572 // add eax, eax
573 // and eax, edi
574 inline uint32 DecodeTwoBytes(const char** ptr) {
575   uint32 value = UnalignedLoad<uint16>(*ptr);
576   // Sign extend the low byte continuation bit
577   uint32_t x = static_cast<int8_t>(value);
578   // This add is an amazing operation, it cancels the low byte continuation bit
579   // from y transferring it to the carry. Simultaneously it also shifts the 7
580   // LSB left by one tightly against high byte varint bits. Hence value now
581   // contains the unpacked value shifted left by 1.
582   value += x;
583   // Use the carry to update the ptr appropriately.
584   *ptr += value < x ? 2 : 1;
585   return value & (x + x);  // Mask out the high byte iff no continuation
586 }
587 
588 // More efficient varint parsing for big varints
589 inline const char* ParseBigVarint(const char* p, uint64* out) {
590   auto pnew = p;
591   auto tmp = DecodeTwoBytes(&pnew);
592   uint64 res = tmp >> 1;
593   if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
594     *out = res;
595     return pnew;
596   }
597   for (std::uint32_t i = 1; i < 5; i++) {
598     pnew = p + 2 * i;
599     tmp = DecodeTwoBytes(&pnew);
600     res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * i - 1);
601     if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
602       *out = res;
603       return pnew;
604     }
605   }
606   return nullptr;
607 }
608 
609 PROTOBUF_EXPORT
610 std::pair<const char*, int32> ReadSizeFallback(const char* p, uint32 first);
611 // Used for tags, could read up to 5 bytes which must be available. Additionally
612 // it makes sure the unsigned value fits a int32, otherwise returns nullptr.
613 // Caller must ensure its safe to call.
614 inline uint32 ReadSize(const char** pp) {
615   auto p = *pp;
616   uint32 res = static_cast<uint8>(p[0]);
617   if (res < 128) {
618     *pp = p + 1;
619     return res;
620   }
621   auto x = ReadSizeFallback(p, res);
622   *pp = x.first;
623   return x.second;
624 }
625 
626 // Some convenience functions to simplify the generated parse loop code.
627 // Returning the value and updating the buffer pointer allows for nicer
628 // function composition. We rely on the compiler to inline this.
629 // Also in debug compiles having local scoped variables tend to generated
630 // stack frames that scale as O(num fields).
631 inline uint64 ReadVarint64(const char** p) {
632   uint64 tmp;
633   *p = VarintParse(*p, &tmp);
634   return tmp;
635 }
636 
637 inline uint32 ReadVarint32(const char** p) {
638   uint32 tmp;
639   *p = VarintParse(*p, &tmp);
640   return tmp;
641 }
642 
643 inline int64 ReadVarintZigZag64(const char** p) {
644   uint64 tmp;
645   *p = VarintParse(*p, &tmp);
646   return WireFormatLite::ZigZagDecode64(tmp);
647 }
648 
649 inline int32 ReadVarintZigZag32(const char** p) {
650   uint64 tmp;
651   *p = VarintParse(*p, &tmp);
652   return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
653 }
654 
655 template <typename T>
656 PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
657     T* msg, const char* ptr) {
658   int old;
659   ptr = ReadSizeAndPushLimitAndDepth(ptr, &old);
660   ptr = ptr ? msg->_InternalParse(ptr, this) : nullptr;
661   depth_++;
662   if (!PopLimit(old)) return nullptr;
663   return ptr;
664 }
665 
666 template <typename Tag, typename T>
667 const char* EpsCopyInputStream::ReadRepeatedFixed(const char* ptr,
668                                                   Tag expected_tag,
669                                                   RepeatedField<T>* out) {
670   do {
671     out->Add(UnalignedLoad<T>(ptr));
672     ptr += sizeof(T);
673     if (PROTOBUF_PREDICT_FALSE(ptr >= limit_end_)) return ptr;
674   } while (UnalignedLoad<Tag>(ptr) == expected_tag && (ptr += sizeof(Tag)));
675   return ptr;
676 }
677 
678 template <typename T>
679 const char* EpsCopyInputStream::ReadPackedFixed(const char* ptr, int size,
680                                                 RepeatedField<T>* out) {
681   int nbytes = buffer_end_ + kSlopBytes - ptr;
682   while (size > nbytes) {
683     int num = nbytes / sizeof(T);
684     int old_entries = out->size();
685     out->Reserve(old_entries + num);
686     int block_size = num * sizeof(T);
687     auto dst = out->AddNAlreadyReserved(num);
688 #ifdef PROTOBUF_LITTLE_ENDIAN
689     std::memcpy(dst, ptr, block_size);
690 #else
691     for (int i = 0; i < num; i++)
692       dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
693 #endif
694     size -= block_size;
695     if (limit_ <= kSlopBytes) return nullptr;
696     ptr = Next();
697     if (ptr == nullptr) return nullptr;
698     ptr += kSlopBytes - (nbytes - block_size);
699     nbytes = buffer_end_ + kSlopBytes - ptr;
700   }
701   int num = size / sizeof(T);
702   int old_entries = out->size();
703   out->Reserve(old_entries + num);
704   int block_size = num * sizeof(T);
705   auto dst = out->AddNAlreadyReserved(num);
706 #ifdef PROTOBUF_LITTLE_ENDIAN
707   std::memcpy(dst, ptr, block_size);
708 #else
709   for (int i = 0; i < num; i++) dst[i] = UnalignedLoad<T>(ptr + i * sizeof(T));
710 #endif
711   ptr += block_size;
712   if (size != block_size) return nullptr;
713   return ptr;
714 }
715 
716 template <typename Add>
717 const char* ReadPackedVarintArray(const char* ptr, const char* end, Add add) {
718   while (ptr < end) {
719     uint64 varint;
720     ptr = VarintParse(ptr, &varint);
721     if (ptr == nullptr) return nullptr;
722     add(varint);
723   }
724   return ptr;
725 }
726 
727 template <typename Add>
728 const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
729   int size = ReadSize(&ptr);
730   if (ptr == nullptr) return nullptr;
731   int chunk_size = buffer_end_ - ptr;
732   while (size > chunk_size) {
733     ptr = ReadPackedVarintArray(ptr, buffer_end_, add);
734     if (ptr == nullptr) return nullptr;
735     int overrun = ptr - buffer_end_;
736     GOOGLE_DCHECK(overrun >= 0 && overrun <= kSlopBytes);
737     if (size - chunk_size <= kSlopBytes) {
738       // The current buffer contains all the information needed, we don't need
739       // to flip buffers. However we must parse from a buffer with enough space
740       // so we are not prone to a buffer overflow.
741       char buf[kSlopBytes + 10] = {};
742       std::memcpy(buf, buffer_end_, kSlopBytes);
743       GOOGLE_CHECK_LE(size - chunk_size, kSlopBytes);
744       auto end = buf + (size - chunk_size);
745       auto res = ReadPackedVarintArray(buf + overrun, end, add);
746       if (res == nullptr || res != end) return nullptr;
747       return buffer_end_ + (res - buf);
748     }
749     size -= overrun + chunk_size;
750     GOOGLE_DCHECK_GT(size, 0);
751     // We must flip buffers
752     if (limit_ <= kSlopBytes) return nullptr;
753     ptr = Next();
754     if (ptr == nullptr) return nullptr;
755     ptr += overrun;
756     chunk_size = buffer_end_ - ptr;
757   }
758   auto end = ptr + size;
759   ptr = ReadPackedVarintArray(ptr, end, add);
760   return end == ptr ? ptr : nullptr;
761 }
762 
763 // Helper for verification of utf8
764 PROTOBUF_EXPORT
765 bool VerifyUTF8(StringPiece s, const char* field_name);
766 
767 inline bool VerifyUTF8(const std::string* s, const char* field_name) {
768   return VerifyUTF8(*s, field_name);
769 }
770 
771 // All the string parsers with or without UTF checking and for all CTypes.
772 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
773     std::string* s, const char* ptr, ParseContext* ctx);
774 
775 
776 // Add any of the following lines to debug which parse function is failing.
777 
778 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
779   if (!(predicate)) {                                  \
780     /*  ::raise(SIGINT);  */                           \
781     /*  GOOGLE_LOG(ERROR) << "Parse failure";  */             \
782     return ret;                                        \
783   }
784 
785 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
786   GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
787 
788 template <typename T>
789 PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
790                                                  const char* ptr,
791                                                  ParseContext* ctx) {
792   uint32 number = tag >> 3;
793   GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
794   using WireType = internal::WireFormatLite::WireType;
795   switch (tag & 7) {
796     case WireType::WIRETYPE_VARINT: {
797       uint64 value;
798       ptr = VarintParse(ptr, &value);
799       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
800       field_parser.AddVarint(number, value);
801       break;
802     }
803     case WireType::WIRETYPE_FIXED64: {
804       uint64 value = UnalignedLoad<uint64>(ptr);
805       ptr += 8;
806       field_parser.AddFixed64(number, value);
807       break;
808     }
809     case WireType::WIRETYPE_LENGTH_DELIMITED: {
810       ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
811       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
812       break;
813     }
814     case WireType::WIRETYPE_START_GROUP: {
815       ptr = field_parser.ParseGroup(number, ptr, ctx);
816       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
817       break;
818     }
819     case WireType::WIRETYPE_END_GROUP: {
820       GOOGLE_LOG(FATAL) << "Can't happen";
821       break;
822     }
823     case WireType::WIRETYPE_FIXED32: {
824       uint32 value = UnalignedLoad<uint32>(ptr);
825       ptr += 4;
826       field_parser.AddFixed32(number, value);
827       break;
828     }
829     default:
830       return nullptr;
831   }
832   return ptr;
833 }
834 
835 template <typename T>
836 PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
837                                                       const char* ptr,
838                                                       ParseContext* ctx) {
839   while (!ctx->Done(&ptr)) {
840     uint32 tag;
841     ptr = ReadTag(ptr, &tag);
842     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
843     if (tag == 0 || (tag & 7) == 4) {
844       ctx->SetLastTag(tag);
845       return ptr;
846     }
847     ptr = FieldParser(tag, field_parser, ptr, ctx);
848     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
849   }
850   return ptr;
851 }
852 
853 // The packed parsers parse repeated numeric primitives directly into  the
854 // corresponding field
855 
856 // These are packed varints
857 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
858     void* object, const char* ptr, ParseContext* ctx);
859 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
860     void* object, const char* ptr, ParseContext* ctx);
861 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
862     void* object, const char* ptr, ParseContext* ctx);
863 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
864     void* object, const char* ptr, ParseContext* ctx);
865 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
866     void* object, const char* ptr, ParseContext* ctx);
867 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
868     void* object, const char* ptr, ParseContext* ctx);
869 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
870     void* object, const char* ptr, ParseContext* ctx);
871 
872 template <typename T>
873 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
874     void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
875     InternalMetadata* metadata, int field_num) {
876   return ctx->ReadPackedVarint(
877       ptr, [object, is_valid, metadata, field_num](uint64 val) {
878         if (is_valid(val)) {
879           static_cast<RepeatedField<int>*>(object)->Add(val);
880         } else {
881           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
882         }
883       });
884 }
885 
886 template <typename T>
887 PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
888     void* object, const char* ptr, ParseContext* ctx,
889     bool (*is_valid)(const void*, int), const void* data,
890     InternalMetadata* metadata, int field_num) {
891   return ctx->ReadPackedVarint(
892       ptr, [object, is_valid, data, metadata, field_num](uint64 val) {
893         if (is_valid(data, val)) {
894           static_cast<RepeatedField<int>*>(object)->Add(val);
895         } else {
896           WriteVarint(field_num, val, metadata->mutable_unknown_fields<T>());
897         }
898       });
899 }
900 
901 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
902     void* object, const char* ptr, ParseContext* ctx);
903 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
904     void* object, const char* ptr, ParseContext* ctx);
905 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
906     void* object, const char* ptr, ParseContext* ctx);
907 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
908     void* object, const char* ptr, ParseContext* ctx);
909 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
910     void* object, const char* ptr, ParseContext* ctx);
911 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
912     void* object, const char* ptr, ParseContext* ctx);
913 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
914     void* object, const char* ptr, ParseContext* ctx);
915 
916 // This is the only recursive parser.
917 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
918     std::string* unknown, const char* ptr, ParseContext* ctx);
919 // This is a helper to for the UnknownGroupLiteParse but is actually also
920 // useful in the generated code. It uses overload on std::string* vs
921 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
922 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
923     uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
924 
925 }  // namespace internal
926 }  // namespace protobuf
927 }  // namespace google
928 
929 #include <google/protobuf/port_undef.inc>
930 
931 #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
932