1 /* pb_common.c: Common support functions for pb_encode.c and pb_decode.c.
2  *
3  * 2014 Petteri Aimonen <jpa@kapsi.fi>
4  */
5 
6 #include "pb_common.h"
7 
load_descriptor_values(pb_field_iter_t * iter)8 static bool load_descriptor_values(pb_field_iter_t *iter)
9 {
10     uint32_t word0;
11     uint32_t data_offset;
12     int_least8_t size_offset;
13 
14     if (iter->index >= iter->descriptor->field_count)
15         return false;
16 
17     word0 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
18     iter->type = (pb_type_t)((word0 >> 8) & 0xFF);
19 
20     switch(word0 & 3)
21     {
22         case 0: {
23             /* 1-word format */
24             iter->array_size = 1;
25             iter->tag = (pb_size_t)((word0 >> 2) & 0x3F);
26             size_offset = (int_least8_t)((word0 >> 24) & 0x0F);
27             data_offset = (word0 >> 16) & 0xFF;
28             iter->data_size = (pb_size_t)((word0 >> 28) & 0x0F);
29             break;
30         }
31 
32         case 1: {
33             /* 2-word format */
34             uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
35 
36             iter->array_size = (pb_size_t)((word0 >> 16) & 0x0FFF);
37             iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 28) << 6));
38             size_offset = (int_least8_t)((word0 >> 28) & 0x0F);
39             data_offset = word1 & 0xFFFF;
40             iter->data_size = (pb_size_t)((word1 >> 16) & 0x0FFF);
41             break;
42         }
43 
44         case 2: {
45             /* 4-word format */
46             uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
47             uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
48             uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
49 
50             iter->array_size = (pb_size_t)(word0 >> 16);
51             iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
52             size_offset = (int_least8_t)(word1 & 0xFF);
53             data_offset = word2;
54             iter->data_size = (pb_size_t)word3;
55             break;
56         }
57 
58         default: {
59             /* 8-word format */
60             uint32_t word1 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 1]);
61             uint32_t word2 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 2]);
62             uint32_t word3 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 3]);
63             uint32_t word4 = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index + 4]);
64 
65             iter->array_size = (pb_size_t)word4;
66             iter->tag = (pb_size_t)(((word0 >> 2) & 0x3F) | ((word1 >> 8) << 6));
67             size_offset = (int_least8_t)(word1 & 0xFF);
68             data_offset = word2;
69             iter->data_size = (pb_size_t)word3;
70             break;
71         }
72     }
73 
74     if (!iter->message)
75     {
76         /* Avoid doing arithmetic on null pointers, it is undefined */
77         iter->pField = NULL;
78         iter->pSize = NULL;
79     }
80     else
81     {
82         iter->pField = (char*)iter->message + data_offset;
83 
84         if (size_offset)
85         {
86             iter->pSize = (char*)iter->pField - size_offset;
87         }
88         else if (PB_HTYPE(iter->type) == PB_HTYPE_REPEATED &&
89                  (PB_ATYPE(iter->type) == PB_ATYPE_STATIC ||
90                   PB_ATYPE(iter->type) == PB_ATYPE_POINTER))
91         {
92             /* Fixed count array */
93             iter->pSize = &iter->array_size;
94         }
95         else
96         {
97             iter->pSize = NULL;
98         }
99 
100         if (PB_ATYPE(iter->type) == PB_ATYPE_POINTER && iter->pField != NULL)
101         {
102             iter->pData = *(void**)iter->pField;
103         }
104         else
105         {
106             iter->pData = iter->pField;
107         }
108     }
109 
110     if (PB_LTYPE_IS_SUBMSG(iter->type))
111     {
112         iter->submsg_desc = iter->descriptor->submsg_info[iter->submessage_index];
113     }
114     else
115     {
116         iter->submsg_desc = NULL;
117     }
118 
119     return true;
120 }
121 
advance_iterator(pb_field_iter_t * iter)122 static void advance_iterator(pb_field_iter_t *iter)
123 {
124     iter->index++;
125 
126     if (iter->index >= iter->descriptor->field_count)
127     {
128         /* Restart */
129         iter->index = 0;
130         iter->field_info_index = 0;
131         iter->submessage_index = 0;
132         iter->required_field_index = 0;
133     }
134     else
135     {
136         /* Increment indexes based on previous field type.
137          * All field info formats have the following fields:
138          * - lowest 2 bits tell the amount of words in the descriptor (2^n words)
139          * - bits 2..7 give the lowest bits of tag number.
140          * - bits 8..15 give the field type.
141          */
142         uint32_t prev_descriptor = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
143         pb_type_t prev_type = (prev_descriptor >> 8) & 0xFF;
144         pb_size_t descriptor_len = (pb_size_t)(1 << (prev_descriptor & 3));
145 
146         /* Add to fields.
147          * The cast to pb_size_t is needed to avoid -Wconversion warning.
148          * Because the data is is constants from generator, there is no danger of overflow.
149          */
150         iter->field_info_index = (pb_size_t)(iter->field_info_index + descriptor_len);
151         iter->required_field_index = (pb_size_t)(iter->required_field_index + (PB_HTYPE(prev_type) == PB_HTYPE_REQUIRED));
152         iter->submessage_index = (pb_size_t)(iter->submessage_index + PB_LTYPE_IS_SUBMSG(prev_type));
153     }
154 }
155 
pb_field_iter_begin(pb_field_iter_t * iter,const pb_msgdesc_t * desc,void * message)156 bool pb_field_iter_begin(pb_field_iter_t *iter, const pb_msgdesc_t *desc, void *message)
157 {
158     memset(iter, 0, sizeof(*iter));
159 
160     iter->descriptor = desc;
161     iter->message = message;
162 
163     return load_descriptor_values(iter);
164 }
165 
pb_field_iter_begin_extension(pb_field_iter_t * iter,pb_extension_t * extension)166 bool pb_field_iter_begin_extension(pb_field_iter_t *iter, pb_extension_t *extension)
167 {
168     const pb_msgdesc_t *msg = (const pb_msgdesc_t*)extension->type->arg;
169     bool status;
170 
171     uint32_t word0 = PB_PROGMEM_READU32(msg->field_info[0]);
172     if (PB_ATYPE(word0 >> 8) == PB_ATYPE_POINTER)
173     {
174         /* For pointer extensions, the pointer is stored directly
175          * in the extension structure. This avoids having an extra
176          * indirection. */
177         status = pb_field_iter_begin(iter, msg, &extension->dest);
178     }
179     else
180     {
181         status = pb_field_iter_begin(iter, msg, extension->dest);
182     }
183 
184     iter->pSize = &extension->found;
185     return status;
186 }
187 
pb_field_iter_next(pb_field_iter_t * iter)188 bool pb_field_iter_next(pb_field_iter_t *iter)
189 {
190     advance_iterator(iter);
191     (void)load_descriptor_values(iter);
192     return iter->index != 0;
193 }
194 
pb_field_iter_find(pb_field_iter_t * iter,uint32_t tag)195 bool pb_field_iter_find(pb_field_iter_t *iter, uint32_t tag)
196 {
197     if (iter->tag == tag)
198     {
199         return true; /* Nothing to do, correct field already. */
200     }
201     else if (tag > iter->descriptor->largest_tag)
202     {
203         return false;
204     }
205     else
206     {
207         pb_size_t start = iter->index;
208         uint32_t fieldinfo;
209 
210         if (tag < iter->tag)
211         {
212             /* Fields are in tag number order, so we know that tag is between
213              * 0 and our start position. Setting index to end forces
214              * advance_iterator() call below to restart from beginning. */
215             iter->index = iter->descriptor->field_count;
216         }
217 
218         do
219         {
220             /* Advance iterator but don't load values yet */
221             advance_iterator(iter);
222 
223             /* Do fast check for tag number match */
224             fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
225 
226             if (((fieldinfo >> 2) & 0x3F) == (tag & 0x3F))
227             {
228                 /* Good candidate, check further */
229                 (void)load_descriptor_values(iter);
230 
231                 if (iter->tag == tag &&
232                     PB_LTYPE(iter->type) != PB_LTYPE_EXTENSION)
233                 {
234                     /* Found it */
235                     return true;
236                 }
237             }
238         } while (iter->index != start);
239 
240         /* Searched all the way back to start, and found nothing. */
241         (void)load_descriptor_values(iter);
242         return false;
243     }
244 }
245 
pb_field_iter_find_extension(pb_field_iter_t * iter)246 bool pb_field_iter_find_extension(pb_field_iter_t *iter)
247 {
248     if (PB_LTYPE(iter->type) == PB_LTYPE_EXTENSION)
249     {
250         return true;
251     }
252     else
253     {
254         pb_size_t start = iter->index;
255         uint32_t fieldinfo;
256 
257         do
258         {
259             /* Advance iterator but don't load values yet */
260             advance_iterator(iter);
261 
262             /* Do fast check for field type */
263             fieldinfo = PB_PROGMEM_READU32(iter->descriptor->field_info[iter->field_info_index]);
264 
265             if (PB_LTYPE((fieldinfo >> 8) & 0xFF) == PB_LTYPE_EXTENSION)
266             {
267                 return load_descriptor_values(iter);
268             }
269         } while (iter->index != start);
270 
271         /* Searched all the way back to start, and found nothing. */
272         (void)load_descriptor_values(iter);
273         return false;
274     }
275 }
276 
pb_const_cast(const void * p)277 static void *pb_const_cast(const void *p)
278 {
279     /* Note: this casts away const, in order to use the common field iterator
280      * logic for both encoding and decoding. The cast is done using union
281      * to avoid spurious compiler warnings. */
282     union {
283         void *p1;
284         const void *p2;
285     } t;
286     t.p2 = p;
287     return t.p1;
288 }
289 
pb_field_iter_begin_const(pb_field_iter_t * iter,const pb_msgdesc_t * desc,const void * message)290 bool pb_field_iter_begin_const(pb_field_iter_t *iter, const pb_msgdesc_t *desc, const void *message)
291 {
292     return pb_field_iter_begin(iter, desc, pb_const_cast(message));
293 }
294 
pb_field_iter_begin_extension_const(pb_field_iter_t * iter,const pb_extension_t * extension)295 bool pb_field_iter_begin_extension_const(pb_field_iter_t *iter, const pb_extension_t *extension)
296 {
297     return pb_field_iter_begin_extension(iter, (pb_extension_t*)pb_const_cast(extension));
298 }
299 
pb_default_field_callback(pb_istream_t * istream,pb_ostream_t * ostream,const pb_field_t * field)300 bool pb_default_field_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_t *field)
301 {
302     if (field->data_size == sizeof(pb_callback_t))
303     {
304         pb_callback_t *pCallback = (pb_callback_t*)field->pData;
305 
306         if (pCallback != NULL)
307         {
308             if (istream != NULL && pCallback->funcs.decode != NULL)
309             {
310                 return pCallback->funcs.decode(istream, field, &pCallback->arg);
311             }
312 
313             if (ostream != NULL && pCallback->funcs.encode != NULL)
314             {
315                 return pCallback->funcs.encode(ostream, field, &pCallback->arg);
316             }
317         }
318     }
319 
320     return true; /* Success, but didn't do anything */
321 
322 }
323 
324 #ifdef PB_VALIDATE_UTF8
325 
326 /* This function checks whether a string is valid UTF-8 text.
327  *
328  * Algorithm is adapted from https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c
329  * Original copyright: Markus Kuhn <http://www.cl.cam.ac.uk/~mgk25/> 2005-03-30
330  * Licensed under "Short code license", which allows use under MIT license or
331  * any compatible with it.
332  */
333 
pb_validate_utf8(const char * str)334 bool pb_validate_utf8(const char *str)
335 {
336     const pb_byte_t *s = (const pb_byte_t*)str;
337     while (*s)
338     {
339         if (*s < 0x80)
340         {
341             /* 0xxxxxxx */
342             s++;
343         }
344         else if ((s[0] & 0xe0) == 0xc0)
345         {
346             /* 110XXXXx 10xxxxxx */
347             if ((s[1] & 0xc0) != 0x80 ||
348                 (s[0] & 0xfe) == 0xc0)                        /* overlong? */
349                 return false;
350             else
351                 s += 2;
352         }
353         else if ((s[0] & 0xf0) == 0xe0)
354         {
355             /* 1110XXXX 10Xxxxxx 10xxxxxx */
356             if ((s[1] & 0xc0) != 0x80 ||
357                 (s[2] & 0xc0) != 0x80 ||
358                 (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) ||    /* overlong? */
359                 (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) ||    /* surrogate? */
360                 (s[0] == 0xef && s[1] == 0xbf &&
361                 (s[2] & 0xfe) == 0xbe))                 /* U+FFFE or U+FFFF? */
362                 return false;
363             else
364                 s += 3;
365         }
366         else if ((s[0] & 0xf8) == 0xf0)
367         {
368             /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
369             if ((s[1] & 0xc0) != 0x80 ||
370                 (s[2] & 0xc0) != 0x80 ||
371                 (s[3] & 0xc0) != 0x80 ||
372                 (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) ||    /* overlong? */
373                 (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */
374                 return false;
375             else
376                 s += 4;
377         }
378         else
379         {
380             return false;
381         }
382     }
383 
384     return true;
385 }
386 
387 #endif
388 
389