1 /*
2 * Copyright (c) 2009-2021, Google LLC
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of Google LLC nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 /*
29 * Internal implementation details of the decoder that are shared between
30 * decode.c and decode_fast.c.
31 */
32
33 #ifndef UPB_DECODE_INT_H_
34 #define UPB_DECODE_INT_H_
35
36 #include <setjmp.h>
37
38 #include "upb/msg_internal.h"
39 #include "upb/upb_internal.h"
40
41 /* Must be last. */
42 #include "upb/port_def.inc"
43
44 #define DECODE_NOGROUP (uint32_t)-1
45
46 typedef struct upb_decstate {
47 const char *end; /* Can read up to 16 bytes slop beyond this. */
48 const char *limit_ptr; /* = end + UPB_MIN(limit, 0) */
49 upb_msg *unknown_msg; /* If non-NULL, add unknown data at buffer flip. */
50 const char *unknown; /* Start of unknown data. */
51 int limit; /* Submessage limit relative to end. */
52 int depth;
53 uint32_t end_group; /* field number of END_GROUP tag, else DECODE_NOGROUP */
54 bool alias;
55 char patch[32];
56 upb_arena arena;
57 jmp_buf err;
58 } upb_decstate;
59
60 /* Error function that will abort decoding with longjmp(). We can't declare this
61 * UPB_NORETURN, even though it is appropriate, because if we do then compilers
62 * will "helpfully" refuse to tailcall to it
63 * (see: https://stackoverflow.com/a/55657013), which will defeat a major goal
64 * of our optimizations. That is also why we must declare it in a separate file,
65 * otherwise the compiler will see that it calls longjmp() and deduce that it is
66 * noreturn. */
67 const char *fastdecode_err(upb_decstate *d);
68
69 extern const uint8_t upb_utf8_offsets[];
70
71 UPB_INLINE
decode_verifyutf8_inl(const char * buf,int len)72 bool decode_verifyutf8_inl(const char *buf, int len) {
73 int i, j;
74 uint8_t offset;
75
76 i = 0;
77 while (i < len) {
78 offset = upb_utf8_offsets[(uint8_t)buf[i]];
79 if (offset == 0 || i + offset > len) {
80 return false;
81 }
82 for (j = i + 1; j < i + offset; j++) {
83 if ((buf[j] & 0xc0) != 0x80) {
84 return false;
85 }
86 }
87 i += offset;
88 }
89 return i == len;
90 }
91
92 /* x86-64 pointers always have the high 16 bits matching. So we can shift
93 * left 8 and right 8 without loss of information. */
decode_totable(const upb_msglayout * tablep)94 UPB_INLINE intptr_t decode_totable(const upb_msglayout *tablep) {
95 return ((intptr_t)tablep << 8) | tablep->table_mask;
96 }
97
decode_totablep(intptr_t table)98 UPB_INLINE const upb_msglayout *decode_totablep(intptr_t table) {
99 return (const upb_msglayout*)(table >> 8);
100 }
101
102 UPB_INLINE
decode_isdonefallback_inl(upb_decstate * d,const char * ptr,int overrun)103 const char *decode_isdonefallback_inl(upb_decstate *d, const char *ptr,
104 int overrun) {
105 if (overrun < d->limit) {
106 /* Need to copy remaining data into patch buffer. */
107 UPB_ASSERT(overrun < 16);
108 if (d->unknown_msg) {
109 if (!_upb_msg_addunknown(d->unknown_msg, d->unknown, ptr - d->unknown,
110 &d->arena)) {
111 return NULL;
112 }
113 d->unknown = &d->patch[0] + overrun;
114 }
115 memset(d->patch + 16, 0, 16);
116 memcpy(d->patch, d->end, 16);
117 ptr = &d->patch[0] + overrun;
118 d->end = &d->patch[16];
119 d->limit -= 16;
120 d->limit_ptr = d->end + d->limit;
121 d->alias = false;
122 UPB_ASSERT(ptr < d->limit_ptr);
123 return ptr;
124 } else {
125 return NULL;
126 }
127 }
128
129 const char *decode_isdonefallback(upb_decstate *d, const char *ptr,
130 int overrun);
131
132 UPB_INLINE
decode_isdone(upb_decstate * d,const char ** ptr)133 bool decode_isdone(upb_decstate *d, const char **ptr) {
134 int overrun = *ptr - d->end;
135 if (UPB_LIKELY(*ptr < d->limit_ptr)) {
136 return false;
137 } else if (UPB_LIKELY(overrun == d->limit)) {
138 return true;
139 } else {
140 *ptr = decode_isdonefallback(d, *ptr, overrun);
141 return false;
142 }
143 }
144
145 #if UPB_FASTTABLE
146 UPB_INLINE
fastdecode_tagdispatch(upb_decstate * d,const char * ptr,upb_msg * msg,intptr_t table,uint64_t hasbits,uint64_t tag)147 const char *fastdecode_tagdispatch(upb_decstate *d, const char *ptr,
148 upb_msg *msg, intptr_t table,
149 uint64_t hasbits, uint64_t tag) {
150 const upb_msglayout *table_p = decode_totablep(table);
151 uint8_t mask = table;
152 uint64_t data;
153 size_t idx = tag & mask;
154 UPB_ASSUME((idx & 7) == 0);
155 idx >>= 3;
156 data = table_p->fasttable[idx].field_data ^ tag;
157 UPB_MUSTTAIL return table_p->fasttable[idx].field_parser(d, ptr, msg, table,
158 hasbits, data);
159 }
160 #endif
161
fastdecode_loadtag(const char * ptr)162 UPB_INLINE uint32_t fastdecode_loadtag(const char* ptr) {
163 uint16_t tag;
164 memcpy(&tag, ptr, 2);
165 return tag;
166 }
167
decode_checklimit(upb_decstate * d)168 UPB_INLINE void decode_checklimit(upb_decstate *d) {
169 UPB_ASSERT(d->limit_ptr == d->end + UPB_MIN(0, d->limit));
170 }
171
decode_pushlimit(upb_decstate * d,const char * ptr,int size)172 UPB_INLINE int decode_pushlimit(upb_decstate *d, const char *ptr, int size) {
173 int limit = size + (int)(ptr - d->end);
174 int delta = d->limit - limit;
175 decode_checklimit(d);
176 d->limit = limit;
177 d->limit_ptr = d->end + UPB_MIN(0, limit);
178 decode_checklimit(d);
179 return delta;
180 }
181
decode_poplimit(upb_decstate * d,const char * ptr,int saved_delta)182 UPB_INLINE void decode_poplimit(upb_decstate *d, const char *ptr,
183 int saved_delta) {
184 UPB_ASSERT(ptr - d->end == d->limit);
185 decode_checklimit(d);
186 d->limit += saved_delta;
187 d->limit_ptr = d->end + UPB_MIN(0, d->limit);
188 decode_checklimit(d);
189 }
190
191 #include "upb/port_undef.inc"
192
193 #endif /* UPB_DECODE_INT_H_ */
194