1 /*
2  * upb::pb::TextPrinter
3  *
4  * OPT: This is not optimized at all.  It uses printf() which parses the format
5  * string every time, and it allocates memory for every put.
6  */
7 
8 #include "upb/pb/textprinter.h"
9 
10 #include <ctype.h>
11 #include <float.h>
12 #include <inttypes.h>
13 #include <stdarg.h>
14 #include <stdio.h>
15 #include <string.h>
16 
17 #include "upb/sink.h"
18 
19 #include "upb/port_def.inc"
20 
21 struct upb_textprinter {
22   upb_sink input_;
23   upb_bytessink output_;
24   int indent_depth_;
25   bool single_line_;
26   void *subc;
27 };
28 
29 #define CHECK(x) if ((x) < 0) goto err;
30 
shortname(const char * longname)31 static const char *shortname(const char *longname) {
32   const char *last = strrchr(longname, '.');
33   return last ? last + 1 : longname;
34 }
35 
indent(upb_textprinter * p)36 static int indent(upb_textprinter *p) {
37   int i;
38   if (!p->single_line_)
39     for (i = 0; i < p->indent_depth_; i++)
40       upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
41   return 0;
42 }
43 
endfield(upb_textprinter * p)44 static int endfield(upb_textprinter *p) {
45   const char ch = (p->single_line_ ? ' ' : '\n');
46   upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
47   return 0;
48 }
49 
putescaped(upb_textprinter * p,const char * buf,size_t len,bool preserve_utf8)50 static int putescaped(upb_textprinter *p, const char *buf, size_t len,
51                       bool preserve_utf8) {
52   /* Based on CEscapeInternal() from Google's protobuf release. */
53   char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
54   const char *end = buf + len;
55 
56   /* I think hex is prettier and more useful, but proto2 uses octal; should
57    * investigate whether it can parse hex also. */
58   const bool use_hex = false;
59   bool last_hex_escape = false; /* true if last output char was \xNN */
60 
61   for (; buf < end; buf++) {
62     bool is_hex_escape;
63 
64     if (dstend - dst < 4) {
65       upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
66       dst = dstbuf;
67     }
68 
69     is_hex_escape = false;
70     switch (*buf) {
71       case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
72       case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
73       case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
74       case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
75       case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
76       case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
77       default:
78         /* Note that if we emit \xNN and the buf character after that is a hex
79          * digit then that digit must be escaped too to prevent it being
80          * interpreted as part of the character code by C. */
81         if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
82             (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
83           sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
84           is_hex_escape = use_hex;
85           dst += 4;
86         } else {
87           *(dst++) = *buf; break;
88         }
89     }
90     last_hex_escape = is_hex_escape;
91   }
92   /* Flush remaining data. */
93   upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
94   return 0;
95 }
96 
putf(upb_textprinter * p,const char * fmt,...)97 bool putf(upb_textprinter *p, const char *fmt, ...) {
98   va_list args;
99   va_list args_copy;
100   char *str;
101   int written;
102   int len;
103   bool ok;
104 
105   va_start(args, fmt);
106 
107   /* Run once to get the length of the string. */
108   _upb_va_copy(args_copy, args);
109   len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
110   va_end(args_copy);
111 
112   /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
113   str = upb_gmalloc(len + 1);
114   if (!str) return false;
115   written = vsprintf(str, fmt, args);
116   va_end(args);
117   UPB_ASSERT(written == len);
118 
119   ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
120   upb_gfree(str);
121   return ok;
122 }
123 
124 
125 /* handlers *******************************************************************/
126 
textprinter_startmsg(void * c,const void * hd)127 static bool textprinter_startmsg(void *c, const void *hd) {
128   upb_textprinter *p = c;
129   UPB_UNUSED(hd);
130   if (p->indent_depth_ == 0) {
131     upb_bytessink_start(p->output_, 0, &p->subc);
132   }
133   return true;
134 }
135 
textprinter_endmsg(void * c,const void * hd,upb_status * s)136 static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
137   upb_textprinter *p = c;
138   UPB_UNUSED(hd);
139   UPB_UNUSED(s);
140   if (p->indent_depth_ == 0) {
141     upb_bytessink_end(p->output_);
142   }
143   return true;
144 }
145 
146 #define TYPE(name, ctype, fmt) \
147   static bool textprinter_put ## name(void *closure, const void *handler_data, \
148                                       ctype val) {                             \
149     upb_textprinter *p = closure;                                              \
150     const upb_fielddef *f = handler_data;                                      \
151     CHECK(indent(p));                                                          \
152     putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
153     CHECK(endfield(p));                                                        \
154     return true;                                                               \
155   err:                                                                         \
156     return false;                                                              \
157 }
158 
textprinter_putbool(void * closure,const void * handler_data,bool val)159 static bool textprinter_putbool(void *closure, const void *handler_data,
160                                 bool val) {
161   upb_textprinter *p = closure;
162   const upb_fielddef *f = handler_data;
163   CHECK(indent(p));
164   putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
165   CHECK(endfield(p));
166   return true;
167 err:
168   return false;
169 }
170 
171 #define STRINGIFY_HELPER(x) #x
172 #define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
173 
174 TYPE(int32,  int32_t,  "%" PRId32)
175 TYPE(int64,  int64_t,  "%" PRId64)
176 TYPE(uint32, uint32_t, "%" PRIu32)
177 TYPE(uint64, uint64_t, "%" PRIu64)
STRINGIFY_MACROVAL(FLT_DIG)178 TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
179 TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
180 
181 #undef TYPE
182 
183 /* Output a symbolic value from the enum if found, else just print as int32. */
184 static bool textprinter_putenum(void *closure, const void *handler_data,
185                                 int32_t val) {
186   upb_textprinter *p = closure;
187   const upb_fielddef *f = handler_data;
188   const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f);
189   const char *label = upb_enumdef_iton(enum_def, val);
190   if (label) {
191     indent(p);
192     putf(p, "%s: %s", upb_fielddef_name(f), label);
193     endfield(p);
194   } else {
195     if (!textprinter_putint32(closure, handler_data, val))
196       return false;
197   }
198   return true;
199 }
200 
textprinter_startstr(void * closure,const void * handler_data,size_t size_hint)201 static void *textprinter_startstr(void *closure, const void *handler_data,
202                       size_t size_hint) {
203   upb_textprinter *p = closure;
204   const upb_fielddef *f = handler_data;
205   UPB_UNUSED(size_hint);
206   indent(p);
207   putf(p, "%s: \"", upb_fielddef_name(f));
208   return p;
209 }
210 
textprinter_endstr(void * closure,const void * handler_data)211 static bool textprinter_endstr(void *closure, const void *handler_data) {
212   upb_textprinter *p = closure;
213   UPB_UNUSED(handler_data);
214   putf(p, "\"");
215   endfield(p);
216   return true;
217 }
218 
textprinter_putstr(void * closure,const void * hd,const char * buf,size_t len,const upb_bufhandle * handle)219 static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
220                                  size_t len, const upb_bufhandle *handle) {
221   upb_textprinter *p = closure;
222   const upb_fielddef *f = hd;
223   UPB_UNUSED(handle);
224   CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
225   return len;
226 err:
227   return 0;
228 }
229 
textprinter_startsubmsg(void * closure,const void * handler_data)230 static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
231   upb_textprinter *p = closure;
232   const char *name = handler_data;
233   CHECK(indent(p));
234   putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
235   p->indent_depth_++;
236   return p;
237 err:
238   return UPB_BREAK;
239 }
240 
textprinter_endsubmsg(void * closure,const void * handler_data)241 static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
242   upb_textprinter *p = closure;
243   UPB_UNUSED(handler_data);
244   p->indent_depth_--;
245   CHECK(indent(p));
246   upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
247   CHECK(endfield(p));
248   return true;
249 err:
250   return false;
251 }
252 
onmreg(const void * c,upb_handlers * h)253 static void onmreg(const void *c, upb_handlers *h) {
254   const upb_msgdef *m = upb_handlers_msgdef(h);
255   upb_msg_field_iter i;
256   UPB_UNUSED(c);
257 
258   upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
259   upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
260 
261   for(upb_msg_field_begin(&i, m);
262       !upb_msg_field_done(&i);
263       upb_msg_field_next(&i)) {
264     upb_fielddef *f = upb_msg_iter_field(&i);
265     upb_handlerattr attr = UPB_HANDLERATTR_INIT;
266     attr.handler_data = f;
267     switch (upb_fielddef_type(f)) {
268       case UPB_TYPE_INT32:
269         upb_handlers_setint32(h, f, textprinter_putint32, &attr);
270         break;
271       case UPB_TYPE_INT64:
272         upb_handlers_setint64(h, f, textprinter_putint64, &attr);
273         break;
274       case UPB_TYPE_UINT32:
275         upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
276         break;
277       case UPB_TYPE_UINT64:
278         upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
279         break;
280       case UPB_TYPE_FLOAT:
281         upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
282         break;
283       case UPB_TYPE_DOUBLE:
284         upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
285         break;
286       case UPB_TYPE_BOOL:
287         upb_handlers_setbool(h, f, textprinter_putbool, &attr);
288         break;
289       case UPB_TYPE_STRING:
290       case UPB_TYPE_BYTES:
291         upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
292         upb_handlers_setstring(h, f, textprinter_putstr, &attr);
293         upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
294         break;
295       case UPB_TYPE_MESSAGE: {
296         const char *name =
297             upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP
298                 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
299                 : upb_fielddef_name(f);
300         attr.handler_data = name;
301         upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
302         upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
303         break;
304       }
305       case UPB_TYPE_ENUM:
306         upb_handlers_setint32(h, f, textprinter_putenum, &attr);
307         break;
308     }
309   }
310 }
311 
textprinter_reset(upb_textprinter * p,bool single_line)312 static void textprinter_reset(upb_textprinter *p, bool single_line) {
313   p->single_line_ = single_line;
314   p->indent_depth_ = 0;
315 }
316 
317 
318 /* Public API *****************************************************************/
319 
upb_textprinter_create(upb_arena * arena,const upb_handlers * h,upb_bytessink output)320 upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h,
321                                         upb_bytessink output) {
322   upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter));
323   if (!p) return NULL;
324 
325   p->output_ = output;
326   upb_sink_reset(&p->input_, h, p);
327   textprinter_reset(p, false);
328 
329   return p;
330 }
331 
upb_textprinter_newcache(void)332 upb_handlercache *upb_textprinter_newcache(void) {
333   return upb_handlercache_new(&onmreg, NULL);
334 }
335 
upb_textprinter_input(upb_textprinter * p)336 upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; }
337 
upb_textprinter_setsingleline(upb_textprinter * p,bool single_line)338 void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
339   p->single_line_ = single_line;
340 }
341