1 /*
2 ** upb::pb::Decoder
3 **
4 ** A high performance, streaming, resumable decoder for the binary protobuf
5 ** format.
6 **
7 ** This interface works the same regardless of what decoder backend is being
8 ** used.  A client of this class does not need to know whether decoding is using
9 ** a JITted decoder (DynASM, LLVM, etc) or an interpreted decoder.  By default,
10 ** it will always use the fastest available decoder.  However, you can call
11 ** set_allow_jit(false) to disable any JIT decoder that might be available.
12 ** This is primarily useful for testing purposes.
13 */
14 
15 #ifndef UPB_DECODER_H_
16 #define UPB_DECODER_H_
17 
18 #include "upb/sink.h"
19 
20 #ifdef __cplusplus
21 namespace upb {
22 namespace pb {
23 class CodeCache;
24 class DecoderPtr;
25 class DecoderMethodPtr;
26 class DecoderMethodOptions;
27 }  /* namespace pb */
28 }  /* namespace upb */
29 #endif
30 
31 /* The maximum number of bytes we are required to buffer internally between
32  * calls to the decoder.  The value is 14: a 5 byte unknown tag plus ten-byte
33  * varint, less one because we are buffering an incomplete value.
34  *
35  * Should only be used by unit tests. */
36 #define UPB_DECODER_MAX_RESIDUAL_BYTES 14
37 
38 /* upb_pbdecodermethod ********************************************************/
39 
40 struct upb_pbdecodermethod;
41 typedef struct upb_pbdecodermethod upb_pbdecodermethod;
42 
43 #ifdef __cplusplus
44 extern "C" {
45 #endif
46 
47 const upb_handlers *upb_pbdecodermethod_desthandlers(
48     const upb_pbdecodermethod *m);
49 const upb_byteshandler *upb_pbdecodermethod_inputhandler(
50     const upb_pbdecodermethod *m);
51 bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m);
52 
53 #ifdef __cplusplus
54 }  /* extern "C" */
55 
56 /* Represents the code to parse a protobuf according to a destination
57  * Handlers. */
58 class upb::pb::DecoderMethodPtr {
59  public:
DecoderMethodPtr()60   DecoderMethodPtr() : ptr_(nullptr) {}
DecoderMethodPtr(const upb_pbdecodermethod * ptr)61   DecoderMethodPtr(const upb_pbdecodermethod* ptr) : ptr_(ptr) {}
62 
ptr()63   const upb_pbdecodermethod* ptr() { return ptr_; }
64 
65   /* The destination handlers that are statically bound to this method.
66    * This method is only capable of outputting to a sink that uses these
67    * handlers. */
dest_handlers()68   const Handlers *dest_handlers() const {
69     return upb_pbdecodermethod_desthandlers(ptr_);
70   }
71 
72   /* The input handlers for this decoder method. */
input_handler()73   const BytesHandler* input_handler() const {
74     return upb_pbdecodermethod_inputhandler(ptr_);
75   }
76 
77   /* Whether this method is native. */
is_native()78   bool is_native() const {
79     return upb_pbdecodermethod_isnative(ptr_);
80   }
81 
82  private:
83   const upb_pbdecodermethod* ptr_;
84 };
85 
86 #endif
87 
88 /* upb_pbdecoder **************************************************************/
89 
90 /* Preallocation hint: decoder won't allocate more bytes than this when first
91  * constructed.  This hint may be an overestimate for some build configurations.
92  * But if the decoder library is upgraded without recompiling the application,
93  * it may be an underestimate. */
94 #define UPB_PB_DECODER_SIZE 4416
95 
96 struct upb_pbdecoder;
97 typedef struct upb_pbdecoder upb_pbdecoder;
98 
99 #ifdef __cplusplus
100 extern "C" {
101 #endif
102 
103 upb_pbdecoder *upb_pbdecoder_create(upb_arena *arena,
104                                     const upb_pbdecodermethod *method,
105                                     upb_sink output, upb_status *status);
106 const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d);
107 upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d);
108 uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d);
109 size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d);
110 bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max);
111 void upb_pbdecoder_reset(upb_pbdecoder *d);
112 
113 #ifdef __cplusplus
114 }  /* extern "C" */
115 
116 /* A Decoder receives binary protobuf data on its input sink and pushes the
117  * decoded data to its output sink. */
118 class upb::pb::DecoderPtr {
119  public:
DecoderPtr()120   DecoderPtr() : ptr_(nullptr) {}
DecoderPtr(upb_pbdecoder * ptr)121   DecoderPtr(upb_pbdecoder* ptr) : ptr_(ptr) {}
122 
ptr()123   upb_pbdecoder* ptr() { return ptr_; }
124 
125   /* Constructs a decoder instance for the given method, which must outlive this
126    * decoder.  Any errors during parsing will be set on the given status, which
127    * must also outlive this decoder.
128    *
129    * The sink must match the given method. */
Create(Arena * arena,DecoderMethodPtr method,upb::Sink output,Status * status)130   static DecoderPtr Create(Arena *arena, DecoderMethodPtr method,
131                            upb::Sink output, Status *status) {
132     return DecoderPtr(upb_pbdecoder_create(arena->ptr(), method.ptr(),
133                                            output.sink(), status->ptr()));
134   }
135 
136   /* Returns the DecoderMethod this decoder is parsing from. */
method()137   const DecoderMethodPtr method() const {
138     return DecoderMethodPtr(upb_pbdecoder_method(ptr_));
139   }
140 
141   /* The sink on which this decoder receives input. */
input()142   BytesSink input() { return BytesSink(upb_pbdecoder_input(ptr())); }
143 
144   /* Returns number of bytes successfully parsed.
145    *
146    * This can be useful for determining the stream position where an error
147    * occurred.
148    *
149    * This value may not be up-to-date when called from inside a parsing
150    * callback. */
BytesParsed()151   uint64_t BytesParsed() { return upb_pbdecoder_bytesparsed(ptr()); }
152 
153   /* Gets/sets the parsing nexting limit.  If the total number of nested
154    * submessages and repeated fields hits this limit, parsing will fail.  This
155    * is a resource limit that controls the amount of memory used by the parsing
156    * stack.
157    *
158    * Setting the limit will fail if the parser is currently suspended at a depth
159    * greater than this, or if memory allocation of the stack fails. */
max_nesting()160   size_t max_nesting() { return upb_pbdecoder_maxnesting(ptr()); }
set_max_nesting(size_t max)161   bool set_max_nesting(size_t max) {
162     return upb_pbdecoder_setmaxnesting(ptr(), max);
163   }
164 
Reset()165   void Reset() { upb_pbdecoder_reset(ptr()); }
166 
167   static const size_t kSize = UPB_PB_DECODER_SIZE;
168 
169  private:
170   upb_pbdecoder *ptr_;
171 };
172 
173 #endif  /* __cplusplus */
174 
175 /* upb_pbcodecache ************************************************************/
176 
177 /* Lazily builds and caches decoder methods that will push data to the given
178  * handlers.  The destination handlercache must outlive this object. */
179 
180 struct upb_pbcodecache;
181 typedef struct upb_pbcodecache upb_pbcodecache;
182 
183 #ifdef __cplusplus
184 extern "C" {
185 #endif
186 
187 upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest);
188 void upb_pbcodecache_free(upb_pbcodecache *c);
189 bool upb_pbcodecache_allowjit(const upb_pbcodecache *c);
190 void upb_pbcodecache_setallowjit(upb_pbcodecache *c, bool allow);
191 void upb_pbcodecache_setlazy(upb_pbcodecache *c, bool lazy);
192 const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
193                                                const upb_msgdef *md);
194 
195 #ifdef __cplusplus
196 }  /* extern "C" */
197 
198 /* A class for caching protobuf processing code, whether bytecode for the
199  * interpreted decoder or machine code for the JIT.
200  *
201  * This class is not thread-safe. */
202 class upb::pb::CodeCache {
203  public:
CodeCache(upb::HandlerCache * dest)204   CodeCache(upb::HandlerCache *dest)
205       : ptr_(upb_pbcodecache_new(dest->ptr()), upb_pbcodecache_free) {}
206   CodeCache(CodeCache&&) = default;
207   CodeCache& operator=(CodeCache&&) = default;
208 
ptr()209   upb_pbcodecache* ptr() { return ptr_.get(); }
ptr()210   const upb_pbcodecache* ptr() const { return ptr_.get(); }
211 
212   /* Whether the cache is allowed to generate machine code.  Defaults to true.
213    * There is no real reason to turn it off except for testing or if you are
214    * having a specific problem with the JIT.
215    *
216    * Note that allow_jit = true does not *guarantee* that the code will be JIT
217    * compiled.  If this platform is not supported or the JIT was not compiled
218    * in, the code may still be interpreted. */
allow_jit()219   bool allow_jit() const { return upb_pbcodecache_allowjit(ptr()); }
220 
221   /* This may only be called when the object is first constructed, and prior to
222    * any code generation. */
set_allow_jit(bool allow)223   void set_allow_jit(bool allow) { upb_pbcodecache_setallowjit(ptr(), allow); }
224 
225   /* Should the decoder push submessages to lazy handlers for fields that have
226    * them?  The caller should set this iff the lazy handlers expect data that is
227    * in protobuf binary format and the caller wishes to lazy parse it. */
set_lazy(bool lazy)228   void set_lazy(bool lazy) { upb_pbcodecache_setlazy(ptr(), lazy); }
229 
230   /* Returns a DecoderMethod that can push data to the given handlers.
231    * If a suitable method already exists, it will be returned from the cache. */
Get(MessageDefPtr md)232   const DecoderMethodPtr Get(MessageDefPtr md) {
233     return DecoderMethodPtr(upb_pbcodecache_get(ptr(), md.ptr()));
234   }
235 
236  private:
237   std::unique_ptr<upb_pbcodecache, decltype(&upb_pbcodecache_free)> ptr_;
238 };
239 
240 #endif  /* __cplusplus */
241 
242 #endif  /* UPB_DECODER_H_ */
243