1 /* Reading Desktop Entry files.
2    Copyright (C) 1995-1998, 2000-2003, 2005-2006, 2008-2009, 2014-2019 Free
3    Software Foundation, Inc.
4    This file was written by Daiki Ueno <ueno@gnu.org>.
5 
6    This program is free software: you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10 
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15 
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
18 
19 #ifdef HAVE_CONFIG_H
20 # include <config.h>
21 #endif
22 
23 /* Specification.  */
24 #include "read-desktop.h"
25 
26 #include "xalloc.h"
27 
28 #include <assert.h>
29 #include <errno.h>
30 #include <stdbool.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include "error.h"
36 #include "error-progname.h"
37 #include "xalloc.h"
38 #include "xvasprintf.h"
39 #include "c-ctype.h"
40 #include "po-lex.h"
41 #include "po-xerror.h"
42 #include "gettext.h"
43 
44 #define _(str) gettext (str)
45 
46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
47 
48 /* The syntax of a Desktop Entry file is defined at
49    https://standards.freedesktop.org/desktop-entry-spec/latest/index.html.  */
50 
51 desktop_reader_ty *
desktop_reader_alloc(desktop_reader_class_ty * method_table)52 desktop_reader_alloc (desktop_reader_class_ty *method_table)
53 {
54   desktop_reader_ty *reader;
55 
56   reader = (desktop_reader_ty *) xmalloc (method_table->size);
57   reader->methods = method_table;
58   if (method_table->constructor)
59     method_table->constructor (reader);
60   return reader;
61 }
62 
63 void
desktop_reader_free(desktop_reader_ty * reader)64 desktop_reader_free (desktop_reader_ty *reader)
65 {
66   if (reader->methods->destructor)
67     reader->methods->destructor (reader);
68   free (reader);
69 }
70 
71 void
desktop_reader_handle_group(desktop_reader_ty * reader,const char * group)72 desktop_reader_handle_group (desktop_reader_ty *reader, const char *group)
73 {
74   if (reader->methods->handle_group)
75     reader->methods->handle_group (reader, group);
76 }
77 
78 void
desktop_reader_handle_pair(desktop_reader_ty * reader,lex_pos_ty * key_pos,const char * key,const char * locale,const char * value)79 desktop_reader_handle_pair (desktop_reader_ty *reader,
80                             lex_pos_ty *key_pos,
81                             const char *key,
82                             const char *locale,
83                             const char *value)
84 {
85   if (reader->methods->handle_pair)
86     reader->methods->handle_pair (reader, key_pos, key, locale, value);
87 }
88 
89 void
desktop_reader_handle_comment(desktop_reader_ty * reader,const char * s)90 desktop_reader_handle_comment (desktop_reader_ty *reader, const char *s)
91 {
92   if (reader->methods->handle_comment)
93     reader->methods->handle_comment (reader, s);
94 }
95 
96 void
desktop_reader_handle_blank(desktop_reader_ty * reader,const char * s)97 desktop_reader_handle_blank (desktop_reader_ty *reader, const char *s)
98 {
99   if (reader->methods->handle_blank)
100     reader->methods->handle_blank (reader, s);
101 }
102 
103 /* Real filename, used in error messages about the input file.  */
104 static const char *real_file_name;
105 
106 /* File name and line number.  */
107 extern lex_pos_ty gram_pos;
108 
109 /* The input file stream.  */
110 static FILE *fp;
111 
112 
113 static int
phase1_getc()114 phase1_getc ()
115 {
116   int c;
117 
118   c = getc (fp);
119 
120   if (c == EOF)
121     {
122       if (ferror (fp))
123         {
124           const char *errno_description = strerror (errno);
125           po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
126                      xasprintf ("%s: %s",
127                                 xasprintf (_("error while reading \"%s\""),
128                                            real_file_name),
129                                 errno_description));
130         }
131       return EOF;
132     }
133 
134   return c;
135 }
136 
137 static inline void
phase1_ungetc(int c)138 phase1_ungetc (int c)
139 {
140   if (c != EOF)
141     ungetc (c, fp);
142 }
143 
144 
145 static unsigned char phase2_pushback[2];
146 static int phase2_pushback_length;
147 
148 static int
phase2_getc()149 phase2_getc ()
150 {
151   int c;
152 
153   if (phase2_pushback_length)
154     c = phase2_pushback[--phase2_pushback_length];
155   else
156     {
157       c = phase1_getc ();
158 
159       if (c == '\r')
160         {
161           int c2 = phase1_getc ();
162           if (c2 == '\n')
163             c = c2;
164           else
165             phase1_ungetc (c2);
166         }
167     }
168 
169   if (c == '\n')
170     gram_pos.line_number++;
171 
172   return c;
173 }
174 
175 static void
phase2_ungetc(int c)176 phase2_ungetc (int c)
177 {
178   if (c == '\n')
179     --gram_pos.line_number;
180   if (c != EOF)
181     phase2_pushback[phase2_pushback_length++] = c;
182 }
183 
184 enum token_type_ty
185 {
186   token_type_eof,
187   token_type_group,
188   token_type_pair,
189   /* Unlike other scanners, preserve comments and blank lines for
190      merging translations back into a desktop file, with msgfmt.  */
191   token_type_comment,
192   token_type_blank,
193   token_type_other
194 };
195 typedef enum token_type_ty token_type_ty;
196 
197 typedef struct token_ty token_ty;
198 struct token_ty
199 {
200   token_type_ty type;
201   char *string;
202   const char *value;
203   const char *locale;
204 };
205 
206 /* Free the memory pointed to by a 'struct token_ty'.  */
207 static inline void
free_token(token_ty * tp)208 free_token (token_ty *tp)
209 {
210   if (tp->type == token_type_group || tp->type == token_type_pair
211       || tp->type == token_type_comment || tp->type == token_type_blank)
212     free (tp->string);
213 }
214 
215 static void
desktop_lex(token_ty * tp)216 desktop_lex (token_ty *tp)
217 {
218   static char *buffer;
219   static size_t bufmax;
220   size_t bufpos;
221 
222 #undef APPEND
223 #define APPEND(c)                               \
224   do                                            \
225     {                                           \
226       if (bufpos >= bufmax)                     \
227         {                                       \
228           bufmax += 100;                        \
229           buffer = xrealloc (buffer, bufmax);   \
230         }                                       \
231       buffer[bufpos++] = c;                     \
232     }                                           \
233   while (0)
234 
235   bufpos = 0;
236   for (;;)
237     {
238       int c;
239 
240       c = phase2_getc ();
241 
242       switch (c)
243         {
244         case EOF:
245           tp->type = token_type_eof;
246           return;
247 
248         case '[':
249           {
250             bool non_blank = false;
251 
252             for (;;)
253               {
254                 c = phase2_getc ();
255                 if (c == EOF || c == ']')
256                   break;
257                 if (c == '\n')
258                   {
259                     po_xerror (PO_SEVERITY_WARNING, NULL,
260                                real_file_name, gram_pos.line_number, 0, false,
261                                _("unterminated group name"));
262                     break;
263                   }
264                 /* Group names may contain all ASCII characters
265                    except for '[' and ']' and control characters.  */
266                 if (!(c_isascii (c) && c != '[' && !c_iscntrl (c)))
267                   break;
268                 APPEND (c);
269               }
270             /* Skip until newline.  */
271             while (c != '\n' && c != EOF)
272               {
273                 c = phase2_getc ();
274                 if (c == EOF)
275                   break;
276                 if (!c_isspace (c))
277                   non_blank = true;
278               }
279             if (non_blank)
280               po_xerror (PO_SEVERITY_WARNING, NULL,
281                          real_file_name, gram_pos.line_number, 0, false,
282                          _("invalid non-blank character"));
283             APPEND (0);
284             tp->type = token_type_group;
285             tp->string = xstrdup (buffer);
286             return;
287           }
288 
289         case '#':
290           {
291             /* Read until newline.  */
292             for (;;)
293               {
294                 c = phase2_getc ();
295                 if (c == EOF || c == '\n')
296                   break;
297                 APPEND (c);
298               }
299             APPEND (0);
300             tp->type = token_type_comment;
301             tp->string = xstrdup (buffer);
302             return;
303           }
304 
305         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
306         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
307         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
308         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
309         case 'Y': case 'Z':
310         case '-':
311         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
312         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
313         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
314         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
315         case 'y': case 'z':
316         case '0': case '1': case '2': case '3': case '4':
317         case '5': case '6': case '7': case '8': case '9':
318           {
319             size_t locale_start;
320             bool found_locale = false;
321             size_t value_start;
322             for (;;)
323               {
324                 APPEND (c);
325 
326                 c = phase2_getc ();
327                 switch (c)
328                   {
329                   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
330                   case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
331                   case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
332                   case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
333                   case 'Y': case 'Z':
334                   case '-':
335                   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
336                   case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
337                   case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
338                   case 's': case 't': case 'u': case 'v': case 'w': case 'x':
339                   case 'y': case 'z':
340                   case '0': case '1': case '2': case '3': case '4':
341                   case '5': case '6': case '7': case '8': case '9':
342                     continue;
343 
344                   case '[':
345                     /* Finish the key part and start the locale part.  */
346                     APPEND (0);
347                     found_locale = true;
348                     locale_start = bufpos;
349 
350                     for (;;)
351                       {
352                         int c2 = phase2_getc ();
353                         if (c2 == EOF || c2 == ']')
354                           break;
355                         APPEND (c2);
356                       }
357                     break;
358 
359                   default:
360                     phase2_ungetc (c);
361                     break;
362                   }
363                 break;
364               }
365             APPEND (0);
366 
367             /* Skip any space before '='.  */
368             for (;;)
369               {
370                 c = phase2_getc ();
371                 switch (c)
372                   {
373                   case ' ':
374                     continue;
375                   default:
376                     phase2_ungetc (c);
377                     break;
378                   case EOF: case '\n':
379                     break;
380                   }
381                 break;
382               }
383 
384             c = phase2_getc ();
385             if (c != '=')
386               {
387                 po_xerror (PO_SEVERITY_WARNING, NULL,
388                            real_file_name, gram_pos.line_number, 0, false,
389                            xasprintf (_("missing '=' after \"%s\""), buffer));
390                 for (;;)
391                   {
392                     c = phase2_getc ();
393                     if (c == EOF || c == '\n')
394                       break;
395                   }
396                 tp->type = token_type_other;
397                 return;
398               }
399 
400             /* Skip any space after '='.  */
401             for (;;)
402               {
403                 c = phase2_getc ();
404                 switch (c)
405                   {
406                   case ' ':
407                     continue;
408                   default:
409                     phase2_ungetc (c);
410                     break;
411                   case EOF:
412                     break;
413                   }
414                 break;
415               }
416 
417             value_start = bufpos;
418             for (;;)
419               {
420                 c = phase2_getc ();
421                 if (c == EOF || c == '\n')
422                   break;
423                 APPEND (c);
424               }
425             APPEND (0);
426             tp->type = token_type_pair;
427             tp->string = xmemdup (buffer, bufpos);
428             tp->locale = found_locale ? &buffer[locale_start] : NULL;
429             tp->value = &buffer[value_start];
430             return;
431           }
432         default:
433           {
434             bool non_blank = false;
435 
436             for (;;)
437               {
438                 if (c == '\n' || c == EOF)
439                   break;
440 
441                 if (!c_isspace (c))
442                   non_blank = true;
443                 else
444                   APPEND (c);
445 
446                 c = phase2_getc ();
447               }
448             if (non_blank)
449               {
450                 po_xerror (PO_SEVERITY_WARNING, NULL,
451                            real_file_name, gram_pos.line_number, 0, false,
452                            _("invalid non-blank line"));
453                 tp->type = token_type_other;
454                 return;
455               }
456             APPEND (0);
457             tp->type = token_type_blank;
458             tp->string = xstrdup (buffer);
459             return;
460           }
461         }
462     }
463 #undef APPEND
464 }
465 
466 void
desktop_parse(desktop_reader_ty * reader,FILE * file,const char * real_filename,const char * logical_filename)467 desktop_parse (desktop_reader_ty *reader, FILE *file,
468                const char *real_filename, const char *logical_filename)
469 {
470   fp = file;
471   real_file_name = real_filename;
472   gram_pos.file_name = xstrdup (logical_filename);
473   gram_pos.line_number = 1;
474 
475   for (;;)
476     {
477       struct token_ty token;
478       desktop_lex (&token);
479       switch (token.type)
480         {
481         case token_type_eof:
482           goto out;
483         case token_type_group:
484           desktop_reader_handle_group (reader, token.string);
485           break;
486         case token_type_comment:
487           desktop_reader_handle_comment (reader, token.string);
488           break;
489         case token_type_pair:
490           desktop_reader_handle_pair (reader, &gram_pos,
491                                       token.string, token.locale, token.value);
492           break;
493         case token_type_blank:
494           desktop_reader_handle_blank (reader, token.string);
495           break;
496         case token_type_other:
497           break;
498         }
499       free_token (&token);
500     }
501 
502  out:
503   fp = NULL;
504   real_file_name = NULL;
505   gram_pos.line_number = 0;
506 }
507 
508 char *
desktop_escape_string(const char * s,bool is_list)509 desktop_escape_string (const char *s, bool is_list)
510 {
511   char *buffer, *p;
512 
513   p = buffer = XNMALLOC (strlen (s) * 2 + 1, char);
514 
515   /* The first character must not be a whitespace.  */
516   if (*s == ' ')
517     {
518       p = stpcpy (p, "\\s");
519       s++;
520     }
521   else if (*s == '\t')
522     {
523       p = stpcpy (p, "\\t");
524       s++;
525     }
526 
527   for (;; s++)
528     {
529       if (*s == '\0')
530         {
531           *p = '\0';
532           break;
533         }
534 
535       switch (*s)
536         {
537         case '\n':
538           p = stpcpy (p, "\\n");
539           break;
540         case '\r':
541           p = stpcpy (p, "\\r");
542           break;
543         case '\\':
544           if (is_list && *(s + 1) == ';')
545             {
546               p = stpcpy (p, "\\;");
547               s++;
548             }
549           else
550             p = stpcpy (p, "\\\\");
551           break;
552         default:
553           *p++ = *s;
554           break;
555         }
556     }
557 
558   return buffer;
559 }
560 
561 char *
desktop_unescape_string(const char * s,bool is_list)562 desktop_unescape_string (const char *s, bool is_list)
563 {
564   char *buffer, *p;
565 
566   p = buffer = XNMALLOC (strlen (s) + 1, char);
567   for (;; s++)
568     {
569       if (*s == '\0')
570         {
571           *p = '\0';
572           break;
573         }
574 
575       if (*s == '\\')
576         {
577           s++;
578 
579           if (*s == '\0')
580             {
581               *p = '\0';
582               break;
583             }
584 
585           switch (*s)
586             {
587             case 's':
588               *p++ = ' ';
589               break;
590             case 'n':
591               *p++ = '\n';
592               break;
593             case 't':
594               *p++ = '\t';
595               break;
596             case 'r':
597               *p++ = '\r';
598               break;
599             case ';':
600               p = stpcpy (p, "\\;");
601               break;
602             default:
603               *p++ = *s;
604               break;
605             }
606         }
607       else
608         *p++ = *s;
609     }
610   return buffer;
611 }
612 
613 void
desktop_add_keyword(hash_table * keywords,const char * name,bool is_list)614 desktop_add_keyword (hash_table *keywords, const char *name, bool is_list)
615 {
616   hash_insert_entry (keywords, name, strlen (name), (void *) is_list);
617 }
618 
619 void
desktop_add_default_keywords(hash_table * keywords)620 desktop_add_default_keywords (hash_table *keywords)
621 {
622   /* When adding new keywords here, also update the documentation in
623      xgettext.texi!  */
624   desktop_add_keyword (keywords, "Name", false);
625   desktop_add_keyword (keywords, "GenericName", false);
626   desktop_add_keyword (keywords, "Comment", false);
627 #if 0 /* Icon values are localizable, but not supported by xgettext.  */
628   desktop_add_keyword (keywords, "Icon", false);
629 #endif
630   desktop_add_keyword (keywords, "Keywords", true);
631 }
632