1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimeglob.c: Private file.  Datastructure for storing the globs.
3  *
4  * More info can be found at http://www.freedesktop.org/standards/
5  *
6  * Copyright (C) 2003  Red Hat, Inc.
7  * Copyright (C) 2003  Jonathan Blandford <jrb@alum.mit.edu>
8  *
9  * Licensed under the Academic Free License version 2.0
10  * Or under the following terms:
11  *
12  * This library is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
24  */
25 
26 #include "config.h"
27 
28 #include "xdgmimeglob.h"
29 #include "xdgmimeint.h"
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <assert.h>
33 #include <string.h>
34 #include <fnmatch.h>
35 
36 #ifndef MAX
37 #define MAX(a,b) ((a) > (b) ? (a) : (b))
38 #endif
39 
40 #ifndef	FALSE
41 #define	FALSE	(0)
42 #endif
43 
44 #ifndef	TRUE
45 #define	TRUE	(!FALSE)
46 #endif
47 
48 typedef struct XdgGlobHashNode XdgGlobHashNode;
49 typedef struct XdgGlobList XdgGlobList;
50 
51 struct XdgGlobHashNode
52 {
53   xdg_unichar_t character;
54   const char *mime_type;
55   int weight;
56   int case_sensitive;
57   XdgGlobHashNode *next;
58   XdgGlobHashNode *child;
59 };
60 struct XdgGlobList
61 {
62   const char *data;
63   const char *mime_type;
64   int weight;
65   int case_sensitive;
66   XdgGlobList *next;
67 };
68 
69 struct XdgGlobHash
70 {
71   XdgGlobList *literal_list;
72   XdgGlobHashNode *simple_node;
73   XdgGlobList *full_list;
74 };
75 
76 
77 /* XdgGlobList
78  */
79 static XdgGlobList *
_xdg_glob_list_new(void)80 _xdg_glob_list_new (void)
81 {
82   XdgGlobList *new_element;
83 
84   new_element = calloc (1, sizeof (XdgGlobList));
85 
86   return new_element;
87 }
88 
89 /* Frees glob_list and all of it's children */
90 static void
_xdg_glob_list_free(XdgGlobList * glob_list)91 _xdg_glob_list_free (XdgGlobList *glob_list)
92 {
93   XdgGlobList *ptr, *next;
94 
95   ptr = glob_list;
96 
97   while (ptr != NULL)
98     {
99       next = ptr->next;
100 
101       if (ptr->data)
102 	free ((void *) ptr->data);
103       if (ptr->mime_type)
104 	free ((void *) ptr->mime_type);
105       free (ptr);
106 
107       ptr = next;
108     }
109 }
110 
111 static XdgGlobList *
_xdg_glob_list_append(XdgGlobList * glob_list,void * data,const char * mime_type,int weight,int case_sensitive)112 _xdg_glob_list_append (XdgGlobList *glob_list,
113 		       void        *data,
114 		       const char  *mime_type,
115 		       int          weight,
116 		       int          case_sensitive)
117 {
118   XdgGlobList *new_element;
119   XdgGlobList *tmp_element;
120 
121   tmp_element = glob_list;
122   while (tmp_element != NULL)
123     {
124       if (strcmp (tmp_element->data, data) == 0 &&
125 	  strcmp (tmp_element->mime_type, mime_type) == 0)
126 	return glob_list;
127 
128       tmp_element = tmp_element->next;
129     }
130 
131   new_element = _xdg_glob_list_new ();
132   new_element->data = data;
133   new_element->mime_type = mime_type;
134   new_element->weight = weight;
135   new_element->case_sensitive = case_sensitive;
136   if (glob_list == NULL)
137     return new_element;
138 
139   tmp_element = glob_list;
140   while (tmp_element->next != NULL)
141     tmp_element = tmp_element->next;
142 
143   tmp_element->next = new_element;
144 
145   return glob_list;
146 }
147 
148 /* XdgGlobHashNode
149  */
150 
151 static XdgGlobHashNode *
_xdg_glob_hash_node_new(void)152 _xdg_glob_hash_node_new (void)
153 {
154   XdgGlobHashNode *glob_hash_node;
155 
156   glob_hash_node = calloc (1, sizeof (XdgGlobHashNode));
157 
158   return glob_hash_node;
159 }
160 
161 #ifdef NOT_USED_IN_GIO
162 
163 static void
_xdg_glob_hash_node_dump(XdgGlobHashNode * glob_hash_node,int depth)164 _xdg_glob_hash_node_dump (XdgGlobHashNode *glob_hash_node,
165 			  int depth)
166 {
167   int i;
168   for (i = 0; i < depth; i++)
169     printf (" ");
170 
171   printf ("%c", (char)glob_hash_node->character);
172   if (glob_hash_node->mime_type)
173     printf (" - %s %d\n", glob_hash_node->mime_type, glob_hash_node->weight);
174   else
175     printf ("\n");
176   if (glob_hash_node->child)
177     _xdg_glob_hash_node_dump (glob_hash_node->child, depth + 1);
178   if (glob_hash_node->next)
179     _xdg_glob_hash_node_dump (glob_hash_node->next, depth);
180 }
181 
182 #endif
183 
184 static XdgGlobHashNode *
_xdg_glob_hash_insert_ucs4(XdgGlobHashNode * glob_hash_node,xdg_unichar_t * text,const char * mime_type,int weight,int case_sensitive)185 _xdg_glob_hash_insert_ucs4 (XdgGlobHashNode *glob_hash_node,
186 			    xdg_unichar_t   *text,
187 			    const char      *mime_type,
188 			    int              weight,
189 			    int              case_sensitive)
190 {
191   XdgGlobHashNode *node;
192   xdg_unichar_t character;
193 
194   character = text[0];
195 
196   if ((glob_hash_node == NULL) ||
197       (character < glob_hash_node->character))
198     {
199       node = _xdg_glob_hash_node_new ();
200       node->character = character;
201       node->next = glob_hash_node;
202       glob_hash_node = node;
203     }
204   else if (character == glob_hash_node->character)
205     {
206       node = glob_hash_node;
207     }
208   else
209     {
210       XdgGlobHashNode *prev_node;
211       int found_node = FALSE;
212 
213       /* Look for the first character of text in glob_hash_node, and insert it if we
214        * have to.*/
215       prev_node = glob_hash_node;
216       node = prev_node->next;
217 
218       while (node != NULL)
219 	{
220 	  if (character < node->character)
221 	    {
222 	      node = _xdg_glob_hash_node_new ();
223 	      node->character = character;
224 	      node->next = prev_node->next;
225 	      prev_node->next = node;
226 
227 	      found_node = TRUE;
228 	      break;
229 	    }
230 	  else if (character == node->character)
231 	    {
232 	      found_node = TRUE;
233 	      break;
234 	    }
235 	  prev_node = node;
236 	  node = node->next;
237 	}
238 
239       if (! found_node)
240 	{
241 	  node = _xdg_glob_hash_node_new ();
242 	  node->character = character;
243 	  node->next = prev_node->next;
244 	  prev_node->next = node;
245 	}
246     }
247 
248   text++;
249   if (*text == 0)
250     {
251       if (node->mime_type)
252 	{
253 	  if (strcmp (node->mime_type, mime_type) != 0)
254 	    {
255 	      XdgGlobHashNode *child;
256 	      int found_node = FALSE;
257 
258 	      child = node->child;
259 	      while (child && child->character == 0)
260 		{
261 		  if (strcmp (child->mime_type, mime_type) == 0)
262 		    {
263 		      found_node = TRUE;
264 		      break;
265 		    }
266 		  child = child->next;
267 		}
268 
269 	      if (!found_node)
270 		{
271 		  child = _xdg_glob_hash_node_new ();
272 		  child->character = 0;
273 		  child->mime_type = strdup (mime_type);
274 		  child->weight = weight;
275 		  child->case_sensitive = case_sensitive;
276 		  child->child = NULL;
277 		  child->next = node->child;
278 		  node->child = child;
279 		}
280 	    }
281 	}
282       else
283 	{
284 	  node->mime_type = strdup (mime_type);
285 	  node->weight = weight;
286 	  node->case_sensitive = case_sensitive;
287 	}
288     }
289   else
290     {
291       node->child = _xdg_glob_hash_insert_ucs4 (node->child, text, mime_type, weight, case_sensitive);
292     }
293   return glob_hash_node;
294 }
295 
296 /* glob must be valid UTF-8 */
297 static XdgGlobHashNode *
_xdg_glob_hash_insert_text(XdgGlobHashNode * glob_hash_node,const char * text,const char * mime_type,int weight,int case_sensitive)298 _xdg_glob_hash_insert_text (XdgGlobHashNode *glob_hash_node,
299 			    const char      *text,
300 			    const char      *mime_type,
301 			    int              weight,
302 			    int              case_sensitive)
303 {
304   XdgGlobHashNode *node;
305   xdg_unichar_t *unitext;
306   int len;
307 
308   unitext = _xdg_convert_to_ucs4 (text, &len);
309   _xdg_reverse_ucs4 (unitext, len);
310   node = _xdg_glob_hash_insert_ucs4 (glob_hash_node, unitext, mime_type, weight, case_sensitive);
311   free (unitext);
312   return node;
313 }
314 
315 typedef struct {
316   const char *mime;
317   int weight;
318 } MimeWeight;
319 
320 static int
_xdg_glob_hash_node_lookup_file_name(XdgGlobHashNode * glob_hash_node,const char * file_name,int len,int case_sensitive_check,MimeWeight mime_types[],int n_mime_types)321 _xdg_glob_hash_node_lookup_file_name (XdgGlobHashNode *glob_hash_node,
322 				      const char      *file_name,
323 				      int              len,
324 				      int              case_sensitive_check,
325 				      MimeWeight       mime_types[],
326 				      int              n_mime_types)
327 {
328   int n;
329   XdgGlobHashNode *node;
330   xdg_unichar_t character;
331 
332   if (glob_hash_node == NULL)
333     return 0;
334 
335   character = file_name[len - 1];
336 
337   for (node = glob_hash_node; node && character >= node->character; node = node->next)
338     {
339       if (character == node->character)
340         {
341           len--;
342           n = 0;
343           if (len > 0)
344 	    {
345 	      n = _xdg_glob_hash_node_lookup_file_name (node->child,
346 							file_name,
347 							len,
348 							case_sensitive_check,
349 							mime_types,
350 							n_mime_types);
351 	    }
352 	  if (n == 0)
353 	    {
354               if (node->mime_type &&
355 		  (case_sensitive_check ||
356 		   !node->case_sensitive))
357                 {
358 	          mime_types[n].mime = node->mime_type;
359 		  mime_types[n].weight = node->weight;
360 		  n++;
361                 }
362 	      node = node->child;
363 	      while (n < n_mime_types && node && node->character == 0)
364 		{
365                   if (node->mime_type &&
366 		      (case_sensitive_check ||
367 		       !node->case_sensitive))
368 		    {
369 		      mime_types[n].mime = node->mime_type;
370 		      mime_types[n].weight = node->weight;
371 		      n++;
372 		    }
373 		  node = node->next;
374 		}
375 	    }
376 	  return n;
377 	}
378     }
379 
380   return 0;
381 }
382 
compare_mime_weight(const void * a,const void * b)383 static int compare_mime_weight (const void *a, const void *b)
384 {
385   const MimeWeight *aa = (const MimeWeight *)a;
386   const MimeWeight *bb = (const MimeWeight *)b;
387 
388   return bb->weight - aa->weight;
389 }
390 
391 #define ISUPPER(c)		((c) >= 'A' && (c) <= 'Z')
392 static char *
ascii_tolower(const char * str)393 ascii_tolower (const char *str)
394 {
395   char *p, *lower;
396 
397   lower = strdup (str);
398   p = lower;
399   while (*p != 0)
400     {
401       char c = *p;
402       *p++ = ISUPPER (c) ? c - 'A' + 'a' : c;
403     }
404   return lower;
405 }
406 
407 static int
filter_out_dupes(MimeWeight mimes[],int n_mimes)408 filter_out_dupes (MimeWeight mimes[], int n_mimes)
409 {
410   int last;
411   int i, j;
412 
413   last = n_mimes;
414 
415   for (i = 0; i < last; i++)
416     {
417       j = i + 1;
418       while (j < last)
419         {
420           if (strcmp (mimes[i].mime, mimes[j].mime) == 0)
421             {
422               mimes[i].weight = MAX (mimes[i].weight, mimes[j].weight);
423               last--;
424               mimes[j].mime = mimes[last].mime;
425               mimes[j].weight = mimes[last].weight;
426             }
427           else
428             j++;
429         }
430     }
431 
432   return last;
433 }
434 
435 int
_xdg_glob_hash_lookup_file_name(XdgGlobHash * glob_hash,const char * file_name,const char * mime_types[],int n_mime_types)436 _xdg_glob_hash_lookup_file_name (XdgGlobHash *glob_hash,
437 				 const char  *file_name,
438 				 const char  *mime_types[],
439 				 int          n_mime_types)
440 {
441   XdgGlobList *list;
442   int i, n;
443   MimeWeight mimes[10];
444   int n_mimes = 10;
445   int len;
446   char *lower_case;
447 
448   /* First, check the literals */
449 
450   assert (file_name != NULL && n_mime_types > 0);
451 
452   n = 0;
453 
454   lower_case = ascii_tolower (file_name);
455 
456   for (list = glob_hash->literal_list; list; list = list->next)
457     {
458       if (strcmp ((const char *)list->data, file_name) == 0)
459 	{
460 	  mime_types[0] = list->mime_type;
461 	  free (lower_case);
462 	  return 1;
463 	}
464     }
465 
466   for (list = glob_hash->literal_list; list; list = list->next)
467     {
468       if (!list->case_sensitive &&
469 	  strcmp ((const char *)list->data, lower_case) == 0)
470 	{
471 	  mime_types[0] = list->mime_type;
472 	  free (lower_case);
473 	  return 1;
474 	}
475     }
476 
477 
478   len = strlen (file_name);
479   n = _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, lower_case, len, FALSE,
480 					    mimes, n_mimes);
481   if (n < 2)
482     n += _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, file_name, len, TRUE,
483 					       mimes + n, n_mimes - n);
484 
485   if (n < 2)
486     {
487       for (list = glob_hash->full_list; list && n < n_mime_types; list = list->next)
488         {
489           if (fnmatch ((const char *)list->data, file_name, 0) == 0)
490 	    {
491 	      mimes[n].mime = list->mime_type;
492 	      mimes[n].weight = list->weight;
493 	      n++;
494 	    }
495         }
496     }
497   free (lower_case);
498 
499   n = filter_out_dupes (mimes, n);
500 
501   qsort (mimes, n, sizeof (MimeWeight), compare_mime_weight);
502 
503   if (n_mime_types < n)
504     n = n_mime_types;
505 
506   for (i = 0; i < n; i++)
507     mime_types[i] = mimes[i].mime;
508 
509   return n;
510 }
511 
512 
513 
514 /* XdgGlobHash
515  */
516 
517 XdgGlobHash *
_xdg_glob_hash_new(void)518 _xdg_glob_hash_new (void)
519 {
520   XdgGlobHash *glob_hash;
521 
522   glob_hash = calloc (1, sizeof (XdgGlobHash));
523 
524   return glob_hash;
525 }
526 
527 
528 static void
_xdg_glob_hash_free_nodes(XdgGlobHashNode * node)529 _xdg_glob_hash_free_nodes (XdgGlobHashNode *node)
530 {
531   if (node)
532     {
533       if (node->child)
534        _xdg_glob_hash_free_nodes (node->child);
535       if (node->next)
536        _xdg_glob_hash_free_nodes (node->next);
537       if (node->mime_type)
538 	free ((void *) node->mime_type);
539       free (node);
540     }
541 }
542 
543 void
_xdg_glob_hash_free(XdgGlobHash * glob_hash)544 _xdg_glob_hash_free (XdgGlobHash *glob_hash)
545 {
546   _xdg_glob_list_free (glob_hash->literal_list);
547   _xdg_glob_list_free (glob_hash->full_list);
548   _xdg_glob_hash_free_nodes (glob_hash->simple_node);
549   free (glob_hash);
550 }
551 
552 XdgGlobType
_xdg_glob_determine_type(const char * glob)553 _xdg_glob_determine_type (const char *glob)
554 {
555   const char *ptr;
556   int maybe_in_simple_glob = FALSE;
557   int first_char = TRUE;
558 
559   ptr = glob;
560 
561   while (*ptr != '\0')
562     {
563       if (*ptr == '*' && first_char)
564 	maybe_in_simple_glob = TRUE;
565       else if (*ptr == '\\' || *ptr == '[' || *ptr == '?' || *ptr == '*')
566 	  return XDG_GLOB_FULL;
567 
568       first_char = FALSE;
569       ptr = _xdg_utf8_next_char (ptr);
570     }
571   if (maybe_in_simple_glob)
572     return XDG_GLOB_SIMPLE;
573   else
574     return XDG_GLOB_LITERAL;
575 }
576 
577 /* glob must be valid UTF-8 */
578 void
_xdg_glob_hash_append_glob(XdgGlobHash * glob_hash,const char * glob,const char * mime_type,int weight,int case_sensitive)579 _xdg_glob_hash_append_glob (XdgGlobHash *glob_hash,
580 			    const char  *glob,
581 			    const char  *mime_type,
582 			    int          weight,
583 			    int          case_sensitive)
584 {
585   XdgGlobType type;
586 
587   assert (glob_hash != NULL);
588   assert (glob != NULL);
589 
590   type = _xdg_glob_determine_type (glob);
591 
592   switch (type)
593     {
594     case XDG_GLOB_LITERAL:
595       glob_hash->literal_list = _xdg_glob_list_append (glob_hash->literal_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
596       break;
597     case XDG_GLOB_SIMPLE:
598       glob_hash->simple_node = _xdg_glob_hash_insert_text (glob_hash->simple_node, glob + 1, mime_type, weight, case_sensitive);
599       break;
600     case XDG_GLOB_FULL:
601       glob_hash->full_list = _xdg_glob_list_append (glob_hash->full_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
602       break;
603     }
604 }
605 
606 #ifdef NOT_USED_IN_GIO
607 
608 void
_xdg_glob_hash_dump(XdgGlobHash * glob_hash)609 _xdg_glob_hash_dump (XdgGlobHash *glob_hash)
610 {
611   XdgGlobList *list;
612   printf ("LITERAL STRINGS\n");
613   if (!glob_hash || glob_hash->literal_list == NULL)
614     {
615       printf ("    None\n");
616     }
617   else
618     {
619       for (list = glob_hash->literal_list; list; list = list->next)
620 	printf ("    %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
621     }
622   printf ("\nSIMPLE GLOBS\n");
623   if (!glob_hash || glob_hash->simple_node == NULL)
624     {
625       printf ("    None\n");
626     }
627   else
628     {
629       _xdg_glob_hash_node_dump (glob_hash->simple_node, 4);
630     }
631 
632   printf ("\nFULL GLOBS\n");
633   if (!glob_hash || glob_hash->full_list == NULL)
634     {
635       printf ("    None\n");
636     }
637   else
638     {
639       for (list = glob_hash->full_list; list; list = list->next)
640 	printf ("    %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
641     }
642 }
643 
644 #endif
645 
646 void
_xdg_mime_glob_read_from_file(XdgGlobHash * glob_hash,const char * file_name,int version_two)647 _xdg_mime_glob_read_from_file (XdgGlobHash *glob_hash,
648 			       const char  *file_name,
649 			       int          version_two)
650 {
651   FILE *glob_file;
652   char line[255];
653   char *p;
654 
655   glob_file = fopen (file_name, "r");
656 
657   if (glob_file == NULL)
658     return;
659 
660   /* FIXME: Not UTF-8 safe.  Doesn't work if lines are greater than 255 chars.
661    * Blah */
662   while (fgets (line, 255, glob_file) != NULL)
663     {
664       char *colon;
665       char *mimetype, *glob, *end;
666       int weight;
667       int case_sensitive;
668 
669       if (line[0] == '#' || line[0] == 0)
670 	continue;
671 
672       end = line + strlen(line) - 1;
673       if (*end == '\n')
674 	*end = 0;
675 
676       p = line;
677       if (version_two)
678 	{
679 	  colon = strchr (p, ':');
680 	  if (colon == NULL)
681 	    continue;
682 	  *colon = 0;
683           weight = atoi (p);
684 	  p = colon + 1;
685 	}
686       else
687 	weight = 50;
688 
689       colon = strchr (p, ':');
690       if (colon == NULL)
691 	continue;
692       *colon = 0;
693 
694       mimetype = p;
695       p = colon + 1;
696       glob = p;
697       case_sensitive = FALSE;
698 
699       colon = strchr (p, ':');
700       if (version_two && colon != NULL)
701 	{
702 	  char *flag;
703 
704 	  /* We got flags */
705 	  *colon = 0;
706 	  p = colon + 1;
707 
708 	  /* Flags end at next colon */
709 	  colon = strchr (p, ':');
710 	  if (colon != NULL)
711 	    *colon = 0;
712 
713 	  flag = strstr (p, "cs");
714 	  if (flag != NULL &&
715 	      /* Start or after comma */
716 	      (flag == p ||
717 	       flag[-1] == ',') &&
718 	      /* ends with comma or end of string */
719 	      (flag[2] == 0 ||
720 	       flag[2] == ','))
721 	    case_sensitive = TRUE;
722 	}
723 
724       _xdg_glob_hash_append_glob (glob_hash, glob, mimetype, weight, case_sensitive);
725     }
726 
727   fclose (glob_file);
728 }
729