1 /* gtkbuilderparser.c
2  * Copyright (C) 2019 Red Hat,
3  *                         Alexander Larsson <alexander.larsson@redhat.com>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #include "config.h"
20 
21 #include <gio/gio.h>
22 #include "gtkbuilderprivate.h"
23 #include "gtkbuilder.h"
24 #include "gtkbuildableprivate.h"
25 
26 /*****************************************  Record a GMarkup parser call ***************************/
27 
28 typedef enum
29 {
30  RECORD_TYPE_ELEMENT,
31  RECORD_TYPE_END_ELEMENT,
32  RECORD_TYPE_TEXT,
33 } RecordTreeType;
34 
35 typedef struct RecordDataTree RecordDataTree;
36 
37 /* All strings are owned by the string table */
38 struct RecordDataTree {
39   RecordDataTree *parent;
40   RecordTreeType type;
41   int n_attributes;
42   const char *data;
43   const char **attributes;
44   const char **values;
45   GList *children;
46 };
47 
48 typedef struct {
49   char *string;
50   int count;
51   int offset;
52 } RecordDataString;
53 
54 static RecordDataTree *
record_data_tree_new(RecordDataTree * parent,RecordTreeType type,const char * data)55 record_data_tree_new (RecordDataTree *parent, RecordTreeType type,  const char *data)
56 {
57   RecordDataTree *tree = g_slice_new0 (RecordDataTree);
58 
59   tree->parent = parent;
60   tree->type = type;
61   tree->data = data;
62 
63   if (parent)
64     parent->children = g_list_prepend  (parent->children, tree);
65 
66   return tree;
67 }
68 
69 static void
record_data_tree_free(RecordDataTree * tree)70 record_data_tree_free (RecordDataTree *tree)
71 {
72   g_list_free_full (tree->children, (GDestroyNotify)record_data_tree_free);
73   g_free (tree->attributes);
74   g_free (tree->values);
75   g_slice_free (RecordDataTree, tree);
76 }
77 
78 static void
record_data_string_free(RecordDataString * s)79 record_data_string_free (RecordDataString *s)
80 {
81   g_free (s->string);
82   g_slice_free (RecordDataString, s);
83 }
84 
85 static const char *
record_data_string_lookup(GHashTable * strings,const char * str,gssize len)86 record_data_string_lookup (GHashTable *strings, const char *str, gssize len)
87 {
88   char *copy = NULL;
89   RecordDataString *s;
90 
91   if (len >= 0)
92     {
93       /* Ensure str is zero terminated */
94       copy = g_strndup (str, len);
95       str = copy;
96     }
97 
98   s = g_hash_table_lookup (strings, str);
99   if (s)
100     {
101       g_free (copy);
102       s->count++;
103       return s->string;
104     }
105 
106   s = g_slice_new (RecordDataString);
107   s->string = copy ? copy : g_strdup (str);
108   s->count = 1;
109 
110   g_hash_table_insert (strings, s->string, s);
111   return s->string;
112 }
113 
114 typedef struct {
115   GHashTable *strings;
116   RecordDataTree *root;
117   RecordDataTree *current;
118 } RecordData;
119 
120 static void
record_start_element(GMarkupParseContext * context,const char * element_name,const char ** names,const char ** values,gpointer user_data,GError ** error)121 record_start_element (GMarkupParseContext  *context,
122                       const char           *element_name,
123                       const char          **names,
124                       const char          **values,
125                       gpointer              user_data,
126                       GError              **error)
127 {
128   gsize n_attrs = g_strv_length ((char **)names);
129   RecordData *data = user_data;
130   RecordDataTree *child;
131   int i;
132 
133   child = record_data_tree_new (data->current, RECORD_TYPE_ELEMENT,
134                                 record_data_string_lookup (data->strings, element_name, -1));
135   data->current = child;
136 
137   child->n_attributes = n_attrs;
138   child->attributes = g_new (const char *, n_attrs);
139   child->values = g_new (const char *, n_attrs);
140 
141   for (i = 0; i < n_attrs; i++)
142     {
143       child->attributes[i] = record_data_string_lookup (data->strings, names[i], -1);
144       child->values[i] = record_data_string_lookup (data->strings, values[i], -1);
145     }
146 }
147 
148 static void
record_end_element(GMarkupParseContext * context,const char * element_name,gpointer user_data,GError ** error)149 record_end_element (GMarkupParseContext  *context,
150                     const char           *element_name,
151                     gpointer              user_data,
152                     GError              **error)
153 {
154   RecordData *data = user_data;
155 
156   data->current = data->current->parent;
157 }
158 
159 static void
record_text(GMarkupParseContext * context,const char * text,gsize text_len,gpointer user_data,GError ** error)160 record_text (GMarkupParseContext  *context,
161              const char           *text,
162              gsize                 text_len,
163              gpointer              user_data,
164              GError              **error)
165 {
166   RecordData *data = user_data;
167 
168   record_data_tree_new (data->current, RECORD_TYPE_TEXT,
169                         record_data_string_lookup (data->strings, text, text_len));
170 }
171 
172 static const GMarkupParser record_parser =
173 {
174   record_start_element,
175   record_end_element,
176   record_text,
177   NULL, // passthrough, not stored
178   NULL, // error, fails immediately
179 };
180 
181 static int
compare_string(gconstpointer _a,gconstpointer _b)182 compare_string (gconstpointer _a,
183                 gconstpointer _b)
184 {
185   const RecordDataString *a = _a;
186   const RecordDataString *b = _b;
187 
188   return b->count - a->count;
189 }
190 
191 static void
marshal_uint32(GString * str,guint32 v)192 marshal_uint32 (GString *str,
193                 guint32 v)
194 {
195   /*
196     We encode in a variable length format similar to
197     utf8:
198 
199     v size      byte 1    byte 2    byte 3   byte 4  byte 5
200     7 bit:    0xxxxxxx
201     14 bit:   10xxxxxx  xxxxxxxx
202     21 bit:   110xxxxx  xxxxxxxx  xxxxxxxx
203     28 bit:   1110xxxx  xxxxxxxx  xxxxxxxx xxxxxxxx
204     32 bit:   11110000  xxxxxxxx  xxxxxxxx xxxxxxxx xxxxxxx
205   */
206 
207   if (v < 128)
208     {
209       g_string_append_c (str, (guchar)v);
210     }
211   else if (v < (1<<14))
212     {
213       g_string_append_c (str, (guchar)(v >> 8) | 0x80);
214       g_string_append_c (str, (guchar)(v & 0xff));
215     }
216   else if (v < (1<<21))
217     {
218       g_string_append_c (str, (guchar)(v >> 16) | 0xc0);
219       g_string_append_c (str, (guchar)((v >> 8) & 0xff));
220       g_string_append_c (str, (guchar)(v & 0xff));
221     }
222   else if (v < (1<<28))
223     {
224       g_string_append_c (str, (guchar)(v >> 24) | 0xe0);
225       g_string_append_c (str, (guchar)((v >> 16) & 0xff));
226       g_string_append_c (str, (guchar)((v >> 8) & 0xff));
227       g_string_append_c (str, (guchar)(v & 0xff));
228     }
229   else
230     {
231       g_string_append_c (str, 0xf0);
232       g_string_append_c (str, (guchar)((v >> 24) & 0xff));
233       g_string_append_c (str, (guchar)((v >> 16) & 0xff));
234       g_string_append_c (str, (guchar)((v >> 8) & 0xff));
235       g_string_append_c (str, (guchar)(v & 0xff));
236     }
237 }
238 
239 static void
marshal_string(GString * marshaled,GHashTable * strings,const char * string)240 marshal_string (GString *marshaled,
241                 GHashTable *strings,
242                 const char *string)
243 {
244   RecordDataString *s;
245 
246   s = g_hash_table_lookup (strings, string);
247   g_assert (s != NULL);
248 
249   marshal_uint32 (marshaled, s->offset);
250 }
251 
252 static void
marshal_tree(GString * marshaled,GHashTable * strings,RecordDataTree * tree)253 marshal_tree (GString *marshaled,
254               GHashTable *strings,
255               RecordDataTree *tree)
256 {
257   GList *l;
258   int i;
259 
260   /* Special case the root */
261   if (tree->parent == NULL)
262     {
263       for (l = g_list_last (tree->children); l != NULL; l = l->prev)
264         marshal_tree (marshaled, strings, l->data);
265       return;
266     }
267 
268   switch (tree->type)
269     {
270     case RECORD_TYPE_ELEMENT:
271       marshal_uint32 (marshaled, RECORD_TYPE_ELEMENT);
272       marshal_string (marshaled, strings, tree->data);
273       marshal_uint32 (marshaled, tree->n_attributes);
274       for (i = 0; i < tree->n_attributes; i++)
275         {
276           marshal_string (marshaled, strings, tree->attributes[i]);
277           marshal_string (marshaled, strings, tree->values[i]);
278         }
279       for (l = g_list_last (tree->children); l != NULL; l = l->prev)
280         marshal_tree (marshaled, strings, l->data);
281 
282       marshal_uint32 (marshaled, RECORD_TYPE_END_ELEMENT);
283       break;
284     case RECORD_TYPE_TEXT:
285       marshal_uint32 (marshaled, RECORD_TYPE_TEXT);
286       marshal_string (marshaled, strings, tree->data);
287       break;
288     case RECORD_TYPE_END_ELEMENT:
289     default:
290       g_assert_not_reached ();
291     }
292 }
293 
294 /**
295  * _gtk_buildable_parser_precompile:
296  * @text: chunk of text to parse
297  * @text_len: length of @text in bytes
298  *
299  * Converts the xml format typically used by GtkBuilder to a
300  * binary form that is more efficient to parse. This is a custom
301  * format that is only supported by GtkBuilder.
302  *
303  * returns: A `GBytes` with the precompiled data
304  **/
305 GBytes *
_gtk_buildable_parser_precompile(const char * text,gssize text_len,GError ** error)306 _gtk_buildable_parser_precompile (const char          *text,
307                                   gssize               text_len,
308                                   GError             **error)
309 {
310   GMarkupParseContext *ctx;
311   RecordData data = { 0 };
312   GList *string_table, *l;
313   GString *marshaled;
314   int offset;
315 
316   data.strings = g_hash_table_new_full (g_str_hash, g_str_equal, NULL, (GDestroyNotify)record_data_string_free);
317   data.root = record_data_tree_new (NULL, RECORD_TYPE_ELEMENT, NULL);
318   data.current = data.root;
319 
320   ctx = g_markup_parse_context_new (&record_parser, G_MARKUP_TREAT_CDATA_AS_TEXT,
321                                     &data, NULL);
322 
323   if (!g_markup_parse_context_parse (ctx, text, text_len, error))
324     {
325       record_data_tree_free (data.root);
326       g_hash_table_destroy (data.strings);
327       g_markup_parse_context_free (ctx);
328       return NULL;
329     }
330 
331   g_markup_parse_context_free (ctx);
332 
333   string_table = g_hash_table_get_values (data.strings);
334 
335   string_table = g_list_sort (string_table, compare_string);
336 
337   offset = 0;
338   for (l = string_table; l != NULL; l = l->next)
339     {
340       RecordDataString *s = l->data;
341       s->offset = offset;
342       offset += strlen (s->string) + 1;
343     }
344 
345   marshaled = g_string_new ("");
346   /* Magic marker */
347   g_string_append_len (marshaled, "GBU\0", 4);
348   marshal_uint32 (marshaled, offset);
349 
350   for (l = string_table; l != NULL; l = l->next)
351     {
352       RecordDataString *s = l->data;
353       g_string_append_len (marshaled, s->string, strlen (s->string) + 1);
354     }
355 
356   g_list_free (string_table);
357 
358   marshal_tree (marshaled, data.strings, data.root);
359 
360   record_data_tree_free (data.root);
361   g_hash_table_destroy (data.strings);
362 
363   return g_string_free_to_bytes (marshaled);
364 }
365 
366 /*****************************************  Replay GMarkup parser callbacks ***************************/
367 
368 static guint32
demarshal_uint32(const char ** tree)369 demarshal_uint32 (const char **tree)
370 {
371   const guchar *p = (const guchar *)*tree;
372   guchar c = *p;
373   /* see marshal_uint32 for format */
374 
375   if (c < 128) /* 7 bit */
376     {
377       *tree += 1;
378       return c;
379     }
380   else if ((c & 0xc0) == 0x80) /* 14 bit */
381     {
382       *tree += 2;
383       return (c & 0x3f) << 8 | p[1];
384     }
385   else if ((c & 0xe0) == 0xc0) /* 21 bit */
386     {
387       *tree += 3;
388       return (c & 0x1f) << 16 | p[1] << 8 | p[2];
389     }
390   else if ((c & 0xf0) == 0xe0) /* 28 bit */
391     {
392       *tree += 4;
393       return (c & 0xf) << 24 | p[1] << 16 | p[2] << 8 | p[3];
394     }
395   else
396     {
397       *tree += 5;
398       return p[1] << 24 | p[2] << 16 | p[3] << 8 | p[4];
399     }
400 }
401 
402 static const char *
demarshal_string(const char ** tree,const char * strings)403 demarshal_string (const char **tree, const char *strings)
404 {
405   guint32 offset = demarshal_uint32 (tree);
406 
407   return strings + offset;
408 }
409 
410 static void
propagate_error(GtkBuildableParseContext * context,GError ** dest,GError * src)411 propagate_error (GtkBuildableParseContext *context,
412                  GError                  **dest,
413                  GError                   *src)
414 {
415   (*context->internal_callbacks->error) (NULL, src, context);
416   g_propagate_error (dest, src);
417 }
418 
419 static gboolean
replay_start_element(GtkBuildableParseContext * context,const char ** tree,const char * strings,GError ** error)420 replay_start_element (GtkBuildableParseContext *context,
421                       const char **tree,
422                       const char *strings,
423                       GError **error)
424 {
425   const char *element_name;
426   guint32 i, n_attrs;
427   const char **attr_names;
428   const char **attr_values;
429   GError *tmp_error = NULL;
430 
431   element_name = demarshal_string (tree, strings);
432   n_attrs = demarshal_uint32 (tree);
433 
434   attr_names = g_newa (const char *, n_attrs + 1);
435   attr_values = g_newa (const char *, n_attrs + 1);
436   for (i = 0; i < n_attrs; i++)
437     {
438       attr_names[i] = demarshal_string (tree, strings);
439       attr_values[i] = demarshal_string (tree, strings);
440     }
441   attr_names[i] = NULL;
442   attr_values[i] = NULL;
443 
444   (* context->internal_callbacks->start_element) (NULL,
445                                                   element_name,
446                                                   attr_names,
447                                                   attr_values,
448                                                   context,
449                                                   &tmp_error);
450 
451   if (tmp_error)
452     {
453       propagate_error (context, error, tmp_error);
454       return FALSE;
455     }
456 
457   return TRUE;
458 }
459 
460 static gboolean
replay_end_element(GtkBuildableParseContext * context,const char ** tree,const char * strings,GError ** error)461 replay_end_element (GtkBuildableParseContext *context,
462                     const char **tree,
463                     const char *strings,
464                     GError **error)
465 {
466   GError *tmp_error = NULL;
467 
468   (* context->internal_callbacks->end_element) (NULL,
469                                                 gtk_buildable_parse_context_get_element (context),
470                                                 context,
471                                                 &tmp_error);
472   if (tmp_error)
473     {
474       propagate_error (context, error, tmp_error);
475       return FALSE;
476     }
477 
478   return TRUE;
479 }
480 
481 static gboolean
replay_text(GtkBuildableParseContext * context,const char ** tree,const char * strings,GError ** error)482 replay_text (GtkBuildableParseContext *context,
483              const char **tree,
484              const char *strings,
485              GError **error)
486 {
487   const char *text;
488   GError *tmp_error = NULL;
489 
490   text = demarshal_string (tree, strings);
491 
492   (*context->internal_callbacks->text) (NULL,
493                                         text,
494                                         strlen (text),
495                                         context,
496                                         &tmp_error);
497 
498   if (tmp_error)
499     {
500       propagate_error (context, error, tmp_error);
501       return FALSE;
502     }
503 
504   return TRUE;
505 }
506 
507 gboolean
_gtk_buildable_parser_is_precompiled(const char * data,gssize data_len)508 _gtk_buildable_parser_is_precompiled (const char           *data,
509                                       gssize                data_len)
510 {
511   return
512     data_len > 4 &&
513     data[0] == 'G' &&
514     data[1] == 'B' &&
515     data[2] == 'U' &&
516     data[3] == 0;
517 }
518 
519 gboolean
_gtk_buildable_parser_replay_precompiled(GtkBuildableParseContext * context,const char * data,gssize data_len,GError ** error)520 _gtk_buildable_parser_replay_precompiled (GtkBuildableParseContext *context,
521                                           const char           *data,
522                                           gssize                data_len,
523                                           GError              **error)
524 {
525   const char *data_end = data + data_len;
526   guint32 type, len;
527   const char *strings;
528   const char *tree;
529 
530   data = data + 4; /* Skip header */
531 
532   len = demarshal_uint32 (&data);
533 
534   strings = data;
535   data = data + len;
536   tree = data;
537 
538   while (tree < data_end)
539     {
540       gboolean res;
541       type = demarshal_uint32 (&tree);
542 
543       switch (type)
544         {
545         case RECORD_TYPE_ELEMENT:
546           res = replay_start_element (context, &tree, strings, error);
547           break;
548         case RECORD_TYPE_END_ELEMENT:
549           res = replay_end_element (context, &tree, strings, error);
550           break;
551         case RECORD_TYPE_TEXT:
552           res = replay_text (context, &tree, strings, error);
553           break;
554         default:
555           g_assert_not_reached ();
556         }
557 
558       if (!res)
559         return FALSE;
560     }
561 
562   return TRUE;
563 }
564