1 // Protocol Buffers - Google's data interchange format
2 // Copyright 2008 Google Inc.  All rights reserved.
3 // https://developers.google.com/protocol-buffers/
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 #ifndef GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
32 #define GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
33 
34 #include <cstdint>
35 #include <cstring>
36 #include <string>
37 
38 #include <google/protobuf/io/coded_stream.h>
39 #include <google/protobuf/io/zero_copy_stream.h>
40 #include <google/protobuf/arenastring.h>
41 #include <google/protobuf/implicit_weak_message.h>
42 #include <google/protobuf/metadata_lite.h>
43 #include <google/protobuf/port.h>
44 #include <google/protobuf/repeated_field.h>
45 #include <google/protobuf/wire_format_lite.h>
46 #include <google/protobuf/stubs/strutil.h>
47 
48 #include <google/protobuf/port_def.inc>
49 
50 
51 namespace google {
52 namespace protobuf {
53 
54 class UnknownFieldSet;
55 class DescriptorPool;
56 class MessageFactory;
57 
58 namespace internal {
59 
60 // Template code below needs to know about the existence of these functions.
61 PROTOBUF_EXPORT void WriteVarint(uint32 num, uint64 val, std::string* s);
62 PROTOBUF_EXPORT void WriteLengthDelimited(uint32 num, StringPiece val,
63                                           std::string* s);
64 // Inline because it is just forwarding to s->WriteVarint
65 inline void WriteVarint(uint32 num, uint64 val, UnknownFieldSet* s);
66 inline void WriteLengthDelimited(uint32 num, StringPiece val,
67                                  UnknownFieldSet* s);
68 
69 
70 // The basic abstraction the parser is designed for is a slight modification
71 // of the ZeroCopyInputStream (ZCIS) abstraction. A ZCIS presents a serialized
72 // stream as a series of buffers that concatenate to the full stream.
73 // Pictorially a ZCIS presents a stream in chunks like so
74 // [---------------------------------------------------------------]
75 // [---------------------] chunk 1
76 //                      [----------------------------] chunk 2
77 //                                          chunk 3 [--------------]
78 //
79 // Where the '-' represent the bytes which are vertically lined up with the
80 // bytes of the stream. The proto parser requires its input to be presented
81 // similarily with the extra
82 // property that each chunk has kSlopBytes past its end that overlaps with the
83 // first kSlopBytes of the next chunk, or if there is no next chunk at least its
84 // still valid to read those bytes. Again, pictorially, we now have
85 //
86 // [---------------------------------------------------------------]
87 // [-------------------....] chunk 1
88 //                    [------------------------....] chunk 2
89 //                                    chunk 3 [------------------..**]
90 //                                                      chunk 4 [--****]
91 // Here '-' mean the bytes of the stream or chunk and '.' means bytes past the
92 // chunk that match up with the start of the next chunk. Above each chunk has
93 // 4 '.' after the chunk. In the case these 'overflow' bytes represents bytes
94 // past the stream, indicated by '*' above, their values are unspecified. It is
95 // still legal to read them (ie. should not segfault). Reading past the
96 // end should be detected by the user and indicated as an error.
97 //
98 // The reason for this, admittedly, unconventional invariant is to ruthlessly
99 // optimize the protobuf parser. Having an overlap helps in two important ways.
100 // Firstly it alleviates having to performing bounds checks if a piece of code
101 // is guaranteed to not read more than kSlopBytes. Secondly, and more
102 // importantly, the protobuf wireformat is such that reading a key/value pair is
103 // always less than 16 bytes. This removes the need to change to next buffer in
104 // the middle of reading primitive values. Hence there is no need to store and
105 // load the current position.
106 
107 class PROTOBUF_EXPORT EpsCopyInputStream {
108  public:
109   enum { kSlopBytes = 16, kMaxCordBytesToCopy = 512 };
110 
EpsCopyInputStream(bool enable_aliasing)111   explicit EpsCopyInputStream(bool enable_aliasing)
112       : aliasing_(enable_aliasing ? kOnPatch : kNoAliasing) {}
113 
BackUp(const char * ptr)114   void BackUp(const char* ptr) {
115     GOOGLE_DCHECK(ptr <= buffer_end_ + kSlopBytes);
116     int count;
117     if (next_chunk_ == buffer_) {
118       count = static_cast<int>(buffer_end_ + kSlopBytes - ptr);
119     } else {
120       count = size_ + static_cast<int>(buffer_end_ - ptr);
121     }
122     if (count > 0) zcis_->BackUp(count);
123   }
124 
125   // If return value is negative it's an error
PushLimit(const char * ptr,int limit)126   PROTOBUF_MUST_USE_RESULT int PushLimit(const char* ptr, int limit) {
127     GOOGLE_DCHECK(limit >= 0);
128     limit += static_cast<int>(ptr - buffer_end_);
129     limit_end_ = buffer_end_ + (std::min)(0, limit);
130     auto old_limit = limit_;
131     limit_ = limit;
132     return old_limit - limit;
133   }
134 
PopLimit(int delta)135   PROTOBUF_MUST_USE_RESULT bool PopLimit(int delta) {
136     if (PROTOBUF_PREDICT_FALSE(!EndedAtLimit())) return false;
137     limit_ = limit_ + delta;
138     // TODO(gerbens) We could remove this line and hoist the code to
139     // DoneFallback. Study the perf/bin-size effects.
140     limit_end_ = buffer_end_ + (std::min)(0, limit_);
141     return true;
142   }
143 
Skip(const char * ptr,int size)144   PROTOBUF_MUST_USE_RESULT const char* Skip(const char* ptr, int size) {
145     if (size <= buffer_end_ + kSlopBytes - ptr) {
146       return ptr + size;
147     }
148     return SkipFallback(ptr, size);
149   }
ReadString(const char * ptr,int size,std::string * s)150   PROTOBUF_MUST_USE_RESULT const char* ReadString(const char* ptr, int size,
151                                                   std::string* s) {
152     if (size <= buffer_end_ + kSlopBytes - ptr) {
153       s->assign(ptr, size);
154       return ptr + size;
155     }
156     return ReadStringFallback(ptr, size, s);
157   }
AppendString(const char * ptr,int size,std::string * s)158   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr, int size,
159                                                     std::string* s) {
160     if (size <= buffer_end_ + kSlopBytes - ptr) {
161       s->append(ptr, size);
162       return ptr + size;
163     }
164     return AppendStringFallback(ptr, size, s);
165   }
166 
167   template <typename Tag, typename T>
168   PROTOBUF_MUST_USE_RESULT const char* ReadRepeatedFixed(const char* ptr,
169                                                          Tag expected_tag,
170                                                          RepeatedField<T>* out);
171 
172   template <typename T>
173   PROTOBUF_MUST_USE_RESULT const char* ReadPackedFixed(const char* ptr,
174                                                        int size,
175                                                        RepeatedField<T>* out);
176   template <typename Add>
177   PROTOBUF_MUST_USE_RESULT const char* ReadPackedVarint(const char* ptr,
178                                                         Add add);
179 
LastTag()180   uint32 LastTag() const { return last_tag_minus_1_ + 1; }
ConsumeEndGroup(uint32 start_tag)181   bool ConsumeEndGroup(uint32 start_tag) {
182     bool res = last_tag_minus_1_ == start_tag;
183     last_tag_minus_1_ = 0;
184     return res;
185   }
EndedAtLimit()186   bool EndedAtLimit() const { return last_tag_minus_1_ == 0; }
EndedAtEndOfStream()187   bool EndedAtEndOfStream() const { return last_tag_minus_1_ == 1; }
SetLastTag(uint32 tag)188   void SetLastTag(uint32 tag) { last_tag_minus_1_ = tag - 1; }
SetEndOfStream()189   void SetEndOfStream() { last_tag_minus_1_ = 1; }
IsExceedingLimit(const char * ptr)190   bool IsExceedingLimit(const char* ptr) {
191     return ptr > limit_end_ &&
192            (next_chunk_ == nullptr || ptr - buffer_end_ > limit_);
193   }
194   // Returns true if more data is available, if false is returned one has to
195   // call Done for further checks.
DataAvailable(const char * ptr)196   bool DataAvailable(const char* ptr) { return ptr < limit_end_; }
197 
198  protected:
199   // Returns true is limit (either an explicit limit or end of stream) is
200   // reached. It aligns *ptr across buffer seams.
201   // If limit is exceeded it returns true and ptr is set to null.
DoneWithCheck(const char ** ptr,int d)202   bool DoneWithCheck(const char** ptr, int d) {
203     GOOGLE_DCHECK(*ptr);
204     if (PROTOBUF_PREDICT_TRUE(*ptr < limit_end_)) return false;
205     // No need to fetch buffer if we ended on a limit in the slop region
206     if ((*ptr - buffer_end_) == limit_) return true;
207     auto res = DoneFallback(*ptr, d);
208     *ptr = res.first;
209     return res.second;
210   }
211 
InitFrom(StringPiece flat)212   const char* InitFrom(StringPiece flat) {
213     overall_limit_ = 0;
214     if (flat.size() > kSlopBytes) {
215       limit_ = kSlopBytes;
216       limit_end_ = buffer_end_ = flat.end() - kSlopBytes;
217       next_chunk_ = buffer_;
218       if (aliasing_ == kOnPatch) aliasing_ = kNoDelta;
219       return flat.begin();
220     } else {
221       std::memcpy(buffer_, flat.begin(), flat.size());
222       limit_ = 0;
223       limit_end_ = buffer_end_ = buffer_ + flat.size();
224       next_chunk_ = nullptr;
225       if (aliasing_ == kOnPatch) {
226         aliasing_ = reinterpret_cast<std::uintptr_t>(flat.data()) -
227                     reinterpret_cast<std::uintptr_t>(buffer_);
228       }
229       return buffer_;
230     }
231   }
232 
233   const char* InitFrom(io::ZeroCopyInputStream* zcis);
234 
InitFrom(io::ZeroCopyInputStream * zcis,int limit)235   const char* InitFrom(io::ZeroCopyInputStream* zcis, int limit) {
236     overall_limit_ = limit;
237     auto res = InitFrom(zcis);
238     limit_ = limit - static_cast<int>(buffer_end_ - res);
239     limit_end_ = buffer_end_ + (std::min)(0, limit_);
240     return res;
241   }
242 
243  private:
244   const char* limit_end_;  // buffer_end_ + min(limit_, 0)
245   const char* buffer_end_;
246   const char* next_chunk_;
247   int size_;
248   int limit_;  // relative to buffer_end_;
249   io::ZeroCopyInputStream* zcis_ = nullptr;
250   char buffer_[2 * kSlopBytes] = {};
251   enum { kNoAliasing = 0, kOnPatch = 1, kNoDelta = 2 };
252   std::uintptr_t aliasing_ = kNoAliasing;
253   // This variable is used to communicate how the parse ended, in order to
254   // completely verify the parsed data. A wire-format parse can end because of
255   // one of the following conditions:
256   // 1) A parse can end on a pushed limit.
257   // 2) A parse can end on End Of Stream (EOS).
258   // 3) A parse can end on 0 tag (only valid for toplevel message).
259   // 4) A parse can end on an end-group tag.
260   // This variable should always be set to 0, which indicates case 1. If the
261   // parse terminated due to EOS (case 2), it's set to 1. In case the parse
262   // ended due to a terminating tag (case 3 and 4) it's set to (tag - 1).
263   // This var doesn't really belong in EpsCopyInputStream and should be part of
264   // the ParseContext, but case 2 is most easily and optimally implemented in
265   // DoneFallback.
266   uint32 last_tag_minus_1_ = 0;
267   int overall_limit_ = INT_MAX;  // Overall limit independent of pushed limits.
268 
269   std::pair<const char*, bool> DoneFallback(const char* ptr, int d);
270   const char* Next(int overrun, int d);
271   const char* SkipFallback(const char* ptr, int size);
272   const char* AppendStringFallback(const char* ptr, int size, std::string* str);
273   const char* ReadStringFallback(const char* ptr, int size, std::string* str);
274 
275   template <typename A>
AppendSize(const char * ptr,int size,const A & append)276   const char* AppendSize(const char* ptr, int size, const A& append) {
277     int chunk_size = buffer_end_ + kSlopBytes - ptr;
278     do {
279       GOOGLE_DCHECK(size > chunk_size);
280       append(ptr, chunk_size);
281       ptr += chunk_size;
282       size -= chunk_size;
283       // DoneFallBack asserts it isn't called when exactly on the limit. If this
284       // happens we fail the parse, as we are at the limit and still more bytes
285       // to read.
286       if (limit_ == kSlopBytes) return nullptr;
287       auto res = DoneFallback(ptr, -1);
288       if (res.second) return nullptr;  // If done we passed the limit
289       ptr = res.first;
290       chunk_size = buffer_end_ + kSlopBytes - ptr;
291     } while (size > chunk_size);
292     append(ptr, size);
293     return ptr + size;
294   }
295 
296   // AppendUntilEnd appends data until a limit (either a PushLimit or end of
297   // stream. Normal payloads are from length delimited fields which have an
298   // explicit size. Reading until limit only comes when the string takes
299   // the place of a protobuf, ie RawMessage/StringRawMessage, lazy fields and
300   // implicit weak messages. We keep these methods private and friend them.
301   template <typename A>
AppendUntilEnd(const char * ptr,const A & append)302   const char* AppendUntilEnd(const char* ptr, const A& append) {
303     while (!DoneWithCheck(&ptr, -1)) {
304       append(ptr, limit_end_ - ptr);
305       ptr = limit_end_;
306     }
307     return ptr;
308   }
309 
AppendString(const char * ptr,std::string * str)310   PROTOBUF_MUST_USE_RESULT const char* AppendString(const char* ptr,
311                                                     std::string* str) {
312     return AppendUntilEnd(
313         ptr, [str](const char* p, ptrdiff_t s) { str->append(p, s); });
314   }
315   friend class ImplicitWeakMessage;
316 };
317 
318 // ParseContext holds all data that is global to the entire parse. Most
319 // importantly it contains the input stream, but also recursion depth and also
320 // stores the end group tag, in case a parser ended on a endgroup, to verify
321 // matching start/end group tags.
322 class PROTOBUF_EXPORT ParseContext : public EpsCopyInputStream {
323  public:
324   struct Data {
325     const DescriptorPool* pool = nullptr;
326     MessageFactory* factory = nullptr;
327   };
328 
329   template <typename... T>
ParseContext(int depth,bool aliasing,const char ** start,T &&...args)330   ParseContext(int depth, bool aliasing, const char** start, T&&... args)
331       : EpsCopyInputStream(aliasing), depth_(depth) {
332     *start = InitFrom(std::forward<T>(args)...);
333   }
334 
TrackCorrectEnding()335   void TrackCorrectEnding() { group_depth_ = 0; }
336 
Done(const char ** ptr)337   bool Done(const char** ptr) { return DoneWithCheck(ptr, group_depth_); }
DoneNoSlopCheck(const char ** ptr)338   bool DoneNoSlopCheck(const char** ptr) { return DoneWithCheck(ptr, -1); }
339 
depth()340   int depth() const { return depth_; }
341 
data()342   Data& data() { return data_; }
data()343   const Data& data() const { return data_; }
344 
345   template <typename T>
346   PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseMessage(
347       T* msg, const char* ptr);
348   // We outline when the type is generic and we go through a virtual
349   const char* ParseMessage(MessageLite* msg, const char* ptr);
350   const char* ParseMessage(Message* msg, const char* ptr);
351 
352   template <typename T>
ParseGroup(T * msg,const char * ptr,uint32 tag)353   PROTOBUF_MUST_USE_RESULT PROTOBUF_ALWAYS_INLINE const char* ParseGroup(
354       T* msg, const char* ptr, uint32 tag) {
355     if (--depth_ < 0) return nullptr;
356     group_depth_++;
357     ptr = msg->_InternalParse(ptr, this);
358     group_depth_--;
359     depth_++;
360     if (PROTOBUF_PREDICT_FALSE(!ConsumeEndGroup(tag))) return nullptr;
361     return ptr;
362   }
363 
364  private:
365   // The context keeps an internal stack to keep track of the recursive
366   // part of the parse state.
367   // Current depth of the active parser, depth counts down.
368   // This is used to limit recursion depth (to prevent overflow on malicious
369   // data), but is also used to index in stack_ to store the current state.
370   int depth_;
371   // Unfortunately necessary for the fringe case of ending on 0 or end-group tag
372   // in the last kSlopBytes of a ZeroCopyInputStream chunk.
373   int group_depth_ = INT_MIN;
374   Data data_;
375 };
376 
377 template <typename T>
UnalignedLoad(const void * p)378 T UnalignedLoad(const void* p) {
379   T res;
380   memcpy(&res, p, sizeof(T));
381   return res;
382 }
383 
384 // TODO(gerbens) Experiment with best implementation.
385 // Clang unrolls loop and generating pretty good code on O2, gcc doesn't.
386 // Unclear if we want 64 bit parse loop unrolled, inlined or opaque function
387 // call. Hence experimentation is needed.
388 // Important guarantee is that it doesn't read more than size bytes from p.
389 template <int size, typename T>
VarintParse(const char * p,T * out)390 PROTOBUF_MUST_USE_RESULT const char* VarintParse(const char* p, T* out) {
391   T res = 1;
392   for (int i = 0; i < size; i++) {
393     T byte = static_cast<uint8>(p[i]);
394     res += (byte - 1) << (i * 7);
395     int j = i + 1;
396     if (PROTOBUF_PREDICT_TRUE(byte < 128)) {
397       *out = res;
398       return p + j;
399     }
400   }
401   *out = 0;
402   return nullptr;
403 }
404 
405 // Decode 2 consecutive bytes of a varint and returns the value, shifted left
406 // by 1. It simultaneous updates *ptr to *ptr + 1 or *ptr + 2 depending if the
407 // first byte's continuation bit is set.
408 // If bit 15 of return value is set (equivalent to the continuation bits of both
409 // bytes being set) the varint continues, otherwise the parse is done. On x86
410 // movsx eax, dil
411 // add edi, eax
412 // adc [rsi], 1
413 // add eax, eax
414 // and eax, edi
DecodeTwoBytes(uint32 value,const char ** ptr)415 inline uint32 DecodeTwoBytes(uint32 value, const char** ptr) {
416   // Sign extend the low byte continuation bit
417   uint32_t x = static_cast<int8_t>(value);
418   // This add is an amazing operation, it cancels the low byte continuation bit
419   // from y transferring it to the carry. Simultaneously it also shifts the 7
420   // LSB left by one tightly against high byte varint bits. Hence value now
421   // contains the unpacked value shifted left by 1.
422   value += x;
423   // Use the carry to update the ptr appropriately.
424   *ptr += value < x ? 2 : 1;
425   return value & (x + x);  // Mask out the high byte iff no continuation
426 }
427 
428 // Used for tags, could read up to 5 bytes which must be available.
429 // Caller must ensure its safe to call.
430 
431 std::pair<const char*, uint32> ReadTagFallback(const char* p, uint32 res);
432 
ReadTag(const char * p,uint32 * out)433 inline const char* ReadTag(const char* p, uint32* out) {
434   uint32 res = static_cast<uint8>(p[0]);
435   if (res < 128) {
436     *out = res;
437     return p + 1;
438   }
439   uint32 second = static_cast<uint8>(p[1]);
440   res += (second - 1) << 7;
441   if (second < 128) {
442     *out = res;
443     return p + 2;
444   }
445   auto tmp = ReadTagFallback(p + 2, res);
446   *out = tmp.second;
447   return tmp.first;
448 }
449 
450 // Will preload the next 2 bytes
ReadTag(const char * p,uint32 * out,uint32 * preload)451 inline const char* ReadTag(const char* p, uint32* out, uint32* preload) {
452   uint32 res = static_cast<uint8>(p[0]);
453   if (res < 128) {
454     *out = res;
455     *preload = UnalignedLoad<uint16>(p + 1);
456     return p + 1;
457   }
458   uint32 second = static_cast<uint8>(p[1]);
459   res += (second - 1) << 7;
460   if (second < 128) {
461     *out = res;
462     *preload = UnalignedLoad<uint16>(p + 2);
463     return p + 2;
464   }
465   auto tmp = ReadTagFallback(p + 2, res);
466   *out = tmp.second;
467   return tmp.first;
468 }
469 
ParseVarint64FallbackInline(const char * p,uint64 res)470 inline std::pair<const char*, uint64> ParseVarint64FallbackInline(const char* p,
471                                                                   uint64 res) {
472   res >>= 1;
473   for (std::uint32_t i = 0; i < 4; i++) {
474     auto pnew = p + 2 * i;
475     auto tmp = DecodeTwoBytes(UnalignedLoad<uint16>(pnew), &pnew);
476     res += (static_cast<std::uint64_t>(tmp) - 2) << (14 * (i + 1) - 1);
477     if (PROTOBUF_PREDICT_TRUE(std::int16_t(tmp) >= 0)) {
478       return {pnew, res};
479     }
480   }
481   return {nullptr, res};
482 }
483 
ParseVarint64Inline(const char * p,uint64 * out)484 inline const char* ParseVarint64Inline(const char* p, uint64* out) {
485   auto tmp = DecodeTwoBytes(UnalignedLoad<uint16>(p), &p);
486   if (PROTOBUF_PREDICT_TRUE(static_cast<int16>(tmp) >= 0)) {
487     *out = tmp >> 1;
488     return p;
489   }
490   auto x = ParseVarint64FallbackInline(p, tmp);
491   *out = x.second;
492   return x.first;
493 }
494 
495 std::pair<const char*, uint64> ParseVarint64Fallback(const char* p, uint64 res);
496 
ParseVarint64(const char * p,uint32 preload,uint64 * out)497 inline const char* ParseVarint64(const char* p, uint32 preload, uint64* out) {
498   auto tmp = DecodeTwoBytes(preload, &p);
499   if (PROTOBUF_PREDICT_TRUE(static_cast<int16>(tmp) >= 0)) {
500     *out = tmp >> 1;
501     return p;
502   }
503   auto x = ParseVarint64Fallback(p, tmp);
504   *out = x.second;
505   return x.first;
506 }
507 
508 // Used for reading varint wiretype values, could read up to 10 bytes.
509 // Caller must ensure its safe to call.
ParseVarint64(const char * p,uint64 * out)510 inline const char* ParseVarint64(const char* p, uint64* out) {
511   return ParseVarint64(p, UnalignedLoad<uint16>(p), out);
512 }
513 
514 std::pair<const char*, int32> ReadSizeFallback(const char* p, uint32 first);
515 // Used for tags, could read up to 5 bytes which must be available. Additionally
516 // it makes sure the unsigned value fits a int32, otherwise returns nullptr.
517 // Caller must ensure its safe to call.
ReadSize(const char ** pp)518 inline uint32 ReadSize(const char** pp) {
519   auto p = *pp;
520   uint32 res = static_cast<uint8>(p[0]);
521   if (res < 128) {
522     *pp = p + 1;
523     return res;
524   }
525   auto x = ReadSizeFallback(p, res);
526   *pp = x.first;
527   return x.second;
528 }
529 
530 // Some convenience functions to simplify the generated parse loop code.
531 // Returning the value and updating the buffer pointer allows for nicer
532 // function composition. We rely on the compiler to inline this.
533 // Also in debug compiles having local scoped variables tend to generated
534 // stack frames that scale as O(num fields).
ReadVarint(const char ** p)535 inline uint64 ReadVarint(const char** p) {
536   uint64 tmp;
537   *p = ParseVarint64(*p, &tmp);
538   return tmp;
539 }
540 
ReadVarintZigZag64(const char ** p)541 inline int64 ReadVarintZigZag64(const char** p) {
542   uint64 tmp;
543   *p = ParseVarint64(*p, &tmp);
544   return WireFormatLite::ZigZagDecode64(tmp);
545 }
546 
ReadVarintZigZag32(const char ** p)547 inline int32 ReadVarintZigZag32(const char** p) {
548   uint64 tmp;
549   *p = ParseVarint64(*p, &tmp);
550   return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
551 }
552 
ReadVarint(const char ** p,uint32 preload)553 inline uint64 ReadVarint(const char** p, uint32 preload) {
554   uint64 tmp;
555   *p = ParseVarint64(*p, preload, &tmp);
556   return tmp;
557 }
558 
ReadVarintZigZag64(const char ** p,uint32 preload)559 inline int64 ReadVarintZigZag64(const char** p, uint32 preload) {
560   uint64 tmp;
561   *p = ParseVarint64(*p, preload, &tmp);
562   return WireFormatLite::ZigZagDecode64(tmp);
563 }
564 
ReadVarintZigZag32(const char ** p,uint32 preload)565 inline int32 ReadVarintZigZag32(const char** p, uint32 preload) {
566   uint64 tmp;
567   *p = ParseVarint64(*p, preload, &tmp);
568   return WireFormatLite::ZigZagDecode32(static_cast<uint32>(tmp));
569 }
570 
571 template <typename T>
ParseMessage(T * msg,const char * ptr)572 PROTOBUF_MUST_USE_RESULT const char* ParseContext::ParseMessage(
573     T* msg, const char* ptr) {
574   int size = ReadSize(&ptr);
575   if (!ptr) return nullptr;
576   auto old = PushLimit(ptr, size);
577   if (--depth_ < 0) return nullptr;
578   ptr = msg->_InternalParse(ptr, this);
579   if (PROTOBUF_PREDICT_FALSE(ptr == nullptr)) return nullptr;
580   depth_++;
581   if (!PopLimit(old)) return nullptr;
582   return ptr;
583 }
584 
585 template <typename Add>
ReadPackedVarint(const char * ptr,Add add)586 const char* EpsCopyInputStream::ReadPackedVarint(const char* ptr, Add add) {
587   int size = ReadSize(&ptr);
588   if (ptr == nullptr) return nullptr;
589   auto old = PushLimit(ptr, size);
590   if (old < 0) return nullptr;
591   while (!DoneWithCheck(&ptr, -1)) {
592     uint64 varint;
593     ptr = ParseVarint64(ptr, &varint);
594     if (!ptr) return nullptr;
595     add(varint);
596   }
597   if (!PopLimit(old)) return nullptr;
598   return ptr;
599 }
600 
601 // Helper for verification of utf8
602 PROTOBUF_EXPORT
603 bool VerifyUTF8(StringPiece s, const char* field_name);
604 
605 // All the string parsers with or without UTF checking and for all CTypes.
606 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParser(
607     std::string* s, const char* ptr, ParseContext* ctx);
608 
609 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char*
610 InlineGreedyStringParserUTF8(std::string* s, const char* ptr, ParseContext* ctx,
611                              const char* field_name);
612 // Inline because we don't want to pay the price of field_name in opt mode.
InlineGreedyStringParserUTF8Verify(std::string * s,const char * ptr,ParseContext * ctx,const char * field_name)613 inline PROTOBUF_MUST_USE_RESULT const char* InlineGreedyStringParserUTF8Verify(
614     std::string* s, const char* ptr, ParseContext* ctx,
615     const char* field_name) {
616   auto p = InlineGreedyStringParser(s, ptr, ctx);
617 #ifndef NDEBUG
618   VerifyUTF8(*s, field_name);
619 #endif  // !NDEBUG
620   return p;
621 }
622 
623 
624 // Add any of the following lines to debug which parse function is failing.
625 
626 #define GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, ret) \
627   if (!(predicate)) {                                  \
628     /*  ::raise(SIGINT);  */                           \
629     /*  GOOGLE_LOG(ERROR) << "Parse failure";  */             \
630     return ret;                                        \
631   }
632 
633 #define GOOGLE_PROTOBUF_PARSER_ASSERT(predicate) \
634   GOOGLE_PROTOBUF_ASSERT_RETURN(predicate, nullptr)
635 
636 template <typename T>
FieldParser(uint64 tag,T & field_parser,const char * ptr,ParseContext * ctx)637 PROTOBUF_MUST_USE_RESULT const char* FieldParser(uint64 tag, T& field_parser,
638                                                  const char* ptr,
639                                                  ParseContext* ctx) {
640   uint32 number = tag >> 3;
641   GOOGLE_PROTOBUF_PARSER_ASSERT(number != 0);
642   using WireType = internal::WireFormatLite::WireType;
643   switch (tag & 7) {
644     case WireType::WIRETYPE_VARINT: {
645       uint64 value;
646       ptr = ParseVarint64(ptr, &value);
647       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
648       field_parser.AddVarint(number, value);
649       break;
650     }
651     case WireType::WIRETYPE_FIXED64: {
652       uint64 value = UnalignedLoad<uint64>(ptr);
653       ptr += 8;
654       field_parser.AddFixed64(number, value);
655       break;
656     }
657     case WireType::WIRETYPE_LENGTH_DELIMITED: {
658       ptr = field_parser.ParseLengthDelimited(number, ptr, ctx);
659       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
660       break;
661     }
662     case WireType::WIRETYPE_START_GROUP: {
663       ptr = field_parser.ParseGroup(number, ptr, ctx);
664       GOOGLE_PROTOBUF_PARSER_ASSERT(ptr);
665       break;
666     }
667     case WireType::WIRETYPE_END_GROUP: {
668       GOOGLE_LOG(FATAL) << "Can't happen";
669       break;
670     }
671     case WireType::WIRETYPE_FIXED32: {
672       uint32 value = UnalignedLoad<uint32>(ptr);
673       ptr += 4;
674       field_parser.AddFixed32(number, value);
675       break;
676     }
677     default:
678       return nullptr;
679   }
680   return ptr;
681 }
682 
683 template <typename T>
WireFormatParser(T & field_parser,const char * ptr,ParseContext * ctx)684 PROTOBUF_MUST_USE_RESULT const char* WireFormatParser(T& field_parser,
685                                                       const char* ptr,
686                                                       ParseContext* ctx) {
687   while (!ctx->Done(&ptr)) {
688     uint32 tag;
689     ptr = ReadTag(ptr, &tag);
690     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
691     if (tag == 0 || (tag & 7) == 4) {
692       ctx->SetLastTag(tag);
693       return ptr;
694     }
695     ptr = FieldParser(tag, field_parser, ptr, ctx);
696     GOOGLE_PROTOBUF_PARSER_ASSERT(ptr != nullptr);
697   }
698   return ptr;
699 }
700 
701 // The packed parsers parse repeated numeric primitives directly into  the
702 // corresponding field
703 
704 // These are packed varints
705 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt32Parser(
706     void* object, const char* ptr, ParseContext* ctx);
707 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt32Parser(
708     void* object, const char* ptr, ParseContext* ctx);
709 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedInt64Parser(
710     void* object, const char* ptr, ParseContext* ctx);
711 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedUInt64Parser(
712     void* object, const char* ptr, ParseContext* ctx);
713 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt32Parser(
714     void* object, const char* ptr, ParseContext* ctx);
715 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSInt64Parser(
716     void* object, const char* ptr, ParseContext* ctx);
717 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
718     void* object, const char* ptr, ParseContext* ctx);
719 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParser(
720     void* object, const char* ptr, ParseContext* ctx, bool (*is_valid)(int),
721     InternalMetadataWithArenaLite* metadata, int field_num);
722 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedEnumParserArg(
723     void* object, const char* ptr, ParseContext* ctx,
724     bool (*is_valid)(const void*, int), const void* data,
725     InternalMetadataWithArenaLite* metadata, int field_num);
726 
727 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedBoolParser(
728     void* object, const char* ptr, ParseContext* ctx);
729 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed32Parser(
730     void* object, const char* ptr, ParseContext* ctx);
731 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed32Parser(
732     void* object, const char* ptr, ParseContext* ctx);
733 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFixed64Parser(
734     void* object, const char* ptr, ParseContext* ctx);
735 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedSFixed64Parser(
736     void* object, const char* ptr, ParseContext* ctx);
737 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedFloatParser(
738     void* object, const char* ptr, ParseContext* ctx);
739 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* PackedDoubleParser(
740     void* object, const char* ptr, ParseContext* ctx);
741 
742 // This is the only recursive parser.
743 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownGroupLiteParse(
744     std::string* unknown, const char* ptr, ParseContext* ctx);
745 // This is a helper to for the UnknownGroupLiteParse but is actually also
746 // useful in the generated code. It uses overload on std::string* vs
747 // UnknownFieldSet* to make the generated code isomorphic between full and lite.
748 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
749     uint32 tag, std::string* unknown, const char* ptr, ParseContext* ctx);
750 PROTOBUF_EXPORT PROTOBUF_MUST_USE_RESULT const char* UnknownFieldParse(
751     uint32 tag, InternalMetadataWithArenaLite* metadata, const char* ptr,
752     ParseContext* ctx);
753 
754 }  // namespace internal
755 }  // namespace protobuf
756 }  // namespace google
757 
758 #include <google/protobuf/port_undef.inc>
759 
760 #endif  // GOOGLE_PROTOBUF_PARSE_CONTEXT_H__
761