1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimeglob.c: Private file.  Datastructure for storing the globs.
3  *
4  * More info can be found at http://www.freedesktop.org/standards/
5  *
6  * Copyright (C) 2003  Red Hat, Inc.
7  * Copyright (C) 2003  Jonathan Blandford <jrb@alum.mit.edu>
8  *
9  * Licensed under the Academic Free License version 2.0
10  * Or under the following terms:
11  *
12  * This library is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2 of the License, or (at your option) any later version.
16  *
17  * This library is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with this library; if not, write to the
24  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
25  * Boston, MA 02111-1307, USA.
26  */
27 
28 #ifdef HAVE_CONFIG_H
29 #include <config.h>
30 #endif
31 
32 #include "xdgmimeglob.h"
33 #include "xdgmimeint.h"
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <assert.h>
37 #include <string.h>
38 #include <fnmatch.h>
39 
40 #ifndef	FALSE
41 #define	FALSE	(0)
42 #endif
43 
44 #ifndef	TRUE
45 #define	TRUE	(!FALSE)
46 #endif
47 
48 typedef struct XdgGlobHashNode XdgGlobHashNode;
49 typedef struct XdgGlobList XdgGlobList;
50 
51 struct XdgGlobHashNode
52 {
53   xdg_unichar_t character;
54   const char *mime_type;
55   int weight;
56   int case_sensitive;
57   XdgGlobHashNode *next;
58   XdgGlobHashNode *child;
59 };
60 struct XdgGlobList
61 {
62   const char *data;
63   const char *mime_type;
64   int weight;
65   int case_sensitive;
66   XdgGlobList *next;
67 };
68 
69 struct XdgGlobHash
70 {
71   XdgGlobList *literal_list;
72   XdgGlobHashNode *simple_node;
73   XdgGlobList *full_list;
74 };
75 
76 
77 /* XdgGlobList
78  */
79 static XdgGlobList *
_xdg_glob_list_new(void)80 _xdg_glob_list_new (void)
81 {
82   XdgGlobList *new_element;
83 
84   new_element = calloc (1, sizeof (XdgGlobList));
85 
86   return new_element;
87 }
88 
89 /* Frees glob_list and all of its children */
90 static void
_xdg_glob_list_free(XdgGlobList * glob_list)91 _xdg_glob_list_free (XdgGlobList *glob_list)
92 {
93   XdgGlobList *ptr, *next;
94 
95   ptr = glob_list;
96 
97   while (ptr != NULL)
98     {
99       next = ptr->next;
100 
101       if (ptr->data)
102 	free ((void *) ptr->data);
103       if (ptr->mime_type)
104 	free ((void *) ptr->mime_type);
105       free (ptr);
106 
107       ptr = next;
108     }
109 }
110 
111 static XdgGlobList *
_xdg_glob_list_append(XdgGlobList * glob_list,void * data,const char * mime_type,int weight,int case_sensitive)112 _xdg_glob_list_append (XdgGlobList *glob_list,
113 		       void        *data,
114 		       const char  *mime_type,
115 		       int          weight,
116 		       int          case_sensitive)
117 {
118   XdgGlobList *new_element;
119   XdgGlobList *tmp_element;
120 
121   tmp_element = glob_list;
122   while (tmp_element != NULL)
123     {
124       if (strcmp (tmp_element->data, data) == 0 &&
125 	  strcmp (tmp_element->mime_type, mime_type) == 0)
126 	return glob_list;
127 
128       tmp_element = tmp_element->next;
129     }
130 
131   new_element = _xdg_glob_list_new ();
132   new_element->data = data;
133   new_element->mime_type = mime_type;
134   new_element->weight = weight;
135   new_element->case_sensitive = case_sensitive;
136   if (glob_list == NULL)
137     return new_element;
138 
139   tmp_element = glob_list;
140   while (tmp_element->next != NULL)
141     tmp_element = tmp_element->next;
142 
143   tmp_element->next = new_element;
144 
145   return glob_list;
146 }
147 
148 /* XdgGlobHashNode
149  */
150 
151 static XdgGlobHashNode *
_xdg_glob_hash_node_new(void)152 _xdg_glob_hash_node_new (void)
153 {
154   XdgGlobHashNode *glob_hash_node;
155 
156   glob_hash_node = calloc (1, sizeof (XdgGlobHashNode));
157 
158   return glob_hash_node;
159 }
160 
161 static void
_xdg_glob_hash_node_dump(XdgGlobHashNode * glob_hash_node,int depth)162 _xdg_glob_hash_node_dump (XdgGlobHashNode *glob_hash_node,
163 			  int depth)
164 {
165   int i;
166   for (i = 0; i < depth; i++)
167     printf (" ");
168 
169   printf ("%c", (char)glob_hash_node->character);
170   if (glob_hash_node->mime_type)
171     printf (" - %s %d\n", glob_hash_node->mime_type, glob_hash_node->weight);
172   else
173     printf ("\n");
174   if (glob_hash_node->child)
175     _xdg_glob_hash_node_dump (glob_hash_node->child, depth + 1);
176   if (glob_hash_node->next)
177     _xdg_glob_hash_node_dump (glob_hash_node->next, depth);
178 }
179 
180 static XdgGlobHashNode *
_xdg_glob_hash_insert_ucs4(XdgGlobHashNode * glob_hash_node,xdg_unichar_t * text,const char * mime_type,int weight,int case_sensitive)181 _xdg_glob_hash_insert_ucs4 (XdgGlobHashNode *glob_hash_node,
182 			    xdg_unichar_t   *text,
183 			    const char      *mime_type,
184 			    int              weight,
185 			    int              case_sensitive)
186 {
187   XdgGlobHashNode *node;
188   xdg_unichar_t character;
189 
190   character = text[0];
191 
192   if ((glob_hash_node == NULL) ||
193       (character < glob_hash_node->character))
194     {
195       node = _xdg_glob_hash_node_new ();
196       node->character = character;
197       node->next = glob_hash_node;
198       glob_hash_node = node;
199     }
200   else if (character == glob_hash_node->character)
201     {
202       node = glob_hash_node;
203     }
204   else
205     {
206       XdgGlobHashNode *prev_node;
207       int found_node = FALSE;
208 
209       /* Look for the first character of text in glob_hash_node, and insert it if we
210        * have to.*/
211       prev_node = glob_hash_node;
212       node = prev_node->next;
213 
214       while (node != NULL)
215 	{
216 	  if (character < node->character)
217 	    {
218 	      node = _xdg_glob_hash_node_new ();
219 	      node->character = character;
220 	      node->next = prev_node->next;
221 	      prev_node->next = node;
222 
223 	      found_node = TRUE;
224 	      break;
225 	    }
226 	  else if (character == node->character)
227 	    {
228 	      found_node = TRUE;
229 	      break;
230 	    }
231 	  prev_node = node;
232 	  node = node->next;
233 	}
234 
235       if (! found_node)
236 	{
237 	  node = _xdg_glob_hash_node_new ();
238 	  node->character = character;
239 	  node->next = prev_node->next;
240 	  prev_node->next = node;
241 	}
242     }
243 
244   text++;
245   if (*text == 0)
246     {
247       if (node->mime_type)
248 	{
249 	  if (strcmp (node->mime_type, mime_type) != 0)
250 	    {
251 	      XdgGlobHashNode *child;
252 	      int found_node = FALSE;
253 
254 	      child = node->child;
255 	      while (child && child->character == 0)
256 		{
257 		  if (strcmp (child->mime_type, mime_type) == 0)
258 		    {
259 		      found_node = TRUE;
260 		      break;
261 		    }
262 		  child = child->next;
263 		}
264 
265 	      if (!found_node)
266 		{
267 		  child = _xdg_glob_hash_node_new ();
268 		  child->character = 0;
269 		  child->mime_type = strdup (mime_type);
270 		  child->weight = weight;
271 		  child->case_sensitive = case_sensitive;
272 		  child->child = NULL;
273 		  child->next = node->child;
274 		  node->child = child;
275 		}
276 	    }
277 	}
278       else
279 	{
280 	  node->mime_type = strdup (mime_type);
281 	  node->weight = weight;
282 	  node->case_sensitive = case_sensitive;
283 	}
284     }
285   else
286     {
287       node->child = _xdg_glob_hash_insert_ucs4 (node->child, text, mime_type, weight, case_sensitive);
288     }
289   return glob_hash_node;
290 }
291 
292 /* glob must be valid UTF-8 */
293 static XdgGlobHashNode *
_xdg_glob_hash_insert_text(XdgGlobHashNode * glob_hash_node,const char * text,const char * mime_type,int weight,int case_sensitive)294 _xdg_glob_hash_insert_text (XdgGlobHashNode *glob_hash_node,
295 			    const char      *text,
296 			    const char      *mime_type,
297 			    int              weight,
298 			    int              case_sensitive)
299 {
300   XdgGlobHashNode *node;
301   xdg_unichar_t *unitext;
302   int len;
303 
304   unitext = _xdg_convert_to_ucs4 (text, &len);
305   _xdg_reverse_ucs4 (unitext, len);
306   node = _xdg_glob_hash_insert_ucs4 (glob_hash_node, unitext, mime_type, weight, case_sensitive);
307   free (unitext);
308   return node;
309 }
310 
311 typedef struct {
312   const char *mime;
313   int weight;
314 } MimeWeight;
315 
316 static int
_xdg_glob_hash_node_lookup_file_name(XdgGlobHashNode * glob_hash_node,const char * file_name,int len,int case_sensitive_check,MimeWeight mime_types[],int n_mime_types)317 _xdg_glob_hash_node_lookup_file_name (XdgGlobHashNode *glob_hash_node,
318 				      const char      *file_name,
319 				      int              len,
320 				      int              case_sensitive_check,
321 				      MimeWeight       mime_types[],
322 				      int              n_mime_types)
323 {
324   int n;
325   XdgGlobHashNode *node;
326   xdg_unichar_t character;
327 
328   if (glob_hash_node == NULL)
329     return 0;
330 
331   character = file_name[len - 1];
332 
333   for (node = glob_hash_node; node && character >= node->character; node = node->next)
334     {
335       if (character == node->character)
336         {
337           len--;
338           n = 0;
339           if (len > 0)
340 	    {
341 	      n = _xdg_glob_hash_node_lookup_file_name (node->child,
342 							file_name,
343 							len,
344 							case_sensitive_check,
345 							mime_types,
346 							n_mime_types);
347 	    }
348 	  if (n == 0)
349 	    {
350               if (node->mime_type &&
351 		  (case_sensitive_check ||
352 		   !node->case_sensitive))
353                 {
354 	          mime_types[n].mime = node->mime_type;
355 		  mime_types[n].weight = node->weight;
356 		  n++;
357                 }
358 	      node = node->child;
359 	      while (n < n_mime_types && node && node->character == 0)
360 		{
361                   if (node->mime_type &&
362 		      (case_sensitive_check ||
363 		       !node->case_sensitive))
364 		    {
365 		      mime_types[n].mime = node->mime_type;
366 		      mime_types[n].weight = node->weight;
367 		      n++;
368 		    }
369 		  node = node->next;
370 		}
371 	    }
372 	  return n;
373 	}
374     }
375 
376   return 0;
377 }
378 
compare_mime_weight(const void * a,const void * b)379 static int compare_mime_weight (const void *a, const void *b)
380 {
381   const MimeWeight *aa = (const MimeWeight *)a;
382   const MimeWeight *bb = (const MimeWeight *)b;
383 
384   return bb->weight - aa->weight;
385 }
386 
387 #define ISUPPER(c)		((c) >= 'A' && (c) <= 'Z')
388 static char *
ascii_tolower(const char * str)389 ascii_tolower (const char *str)
390 {
391   char *p, *lower;
392 
393   lower = strdup (str);
394   p = lower;
395   while (*p != 0)
396     {
397       char c = *p;
398       *p++ = ISUPPER (c) ? c - 'A' + 'a' : c;
399     }
400   return lower;
401 }
402 
403 int
_xdg_glob_hash_lookup_file_name(XdgGlobHash * glob_hash,const char * file_name,const char * mime_types[],int n_mime_types)404 _xdg_glob_hash_lookup_file_name (XdgGlobHash *glob_hash,
405 				 const char  *file_name,
406 				 const char  *mime_types[],
407 				 int          n_mime_types)
408 {
409   XdgGlobList *list;
410   int i, n;
411   MimeWeight mimes[10];
412   int n_mimes = 10;
413   int len;
414   char *lower_case;
415 
416   /* First, check the literals */
417 
418   assert (file_name != NULL && n_mime_types > 0);
419 
420   n = 0;
421 
422   lower_case = ascii_tolower (file_name);
423 
424   for (list = glob_hash->literal_list; list; list = list->next)
425     {
426       if (strcmp ((const char *)list->data, file_name) == 0)
427 	{
428 	  mime_types[0] = list->mime_type;
429 	  free (lower_case);
430 	  return 1;
431 	}
432     }
433 
434   for (list = glob_hash->literal_list; list; list = list->next)
435     {
436       if (!list->case_sensitive &&
437 	  strcmp ((const char *)list->data, lower_case) == 0)
438 	{
439 	  mime_types[0] = list->mime_type;
440 	  free (lower_case);
441 	  return 1;
442 	}
443     }
444 
445 
446   len = strlen (file_name);
447   n = _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, lower_case, len, FALSE,
448 					    mimes, n_mimes);
449   if (n == 0)
450     n = _xdg_glob_hash_node_lookup_file_name (glob_hash->simple_node, file_name, len, TRUE,
451 					      mimes, n_mimes);
452 
453   if (n == 0)
454     {
455       for (list = glob_hash->full_list; list && n < n_mime_types; list = list->next)
456         {
457           if (fnmatch ((const char *)list->data, file_name, 0) == 0)
458 	    {
459 	      mimes[n].mime = list->mime_type;
460 	      mimes[n].weight = list->weight;
461 	      n++;
462 	    }
463         }
464     }
465   free (lower_case);
466 
467   qsort (mimes, n, sizeof (MimeWeight), compare_mime_weight);
468 
469   if (n_mime_types < n)
470     n = n_mime_types;
471 
472   for (i = 0; i < n; i++)
473     mime_types[i] = mimes[i].mime;
474 
475   return n;
476 }
477 
478 
479 
480 /* XdgGlobHash
481  */
482 
483 XdgGlobHash *
_xdg_glob_hash_new(void)484 _xdg_glob_hash_new (void)
485 {
486   XdgGlobHash *glob_hash;
487 
488   glob_hash = calloc (1, sizeof (XdgGlobHash));
489 
490   return glob_hash;
491 }
492 
493 
494 static void
_xdg_glob_hash_free_nodes(XdgGlobHashNode * node)495 _xdg_glob_hash_free_nodes (XdgGlobHashNode *node)
496 {
497   if (node)
498     {
499       if (node->child)
500        _xdg_glob_hash_free_nodes (node->child);
501       if (node->next)
502        _xdg_glob_hash_free_nodes (node->next);
503       if (node->mime_type)
504 	free ((void *) node->mime_type);
505       free (node);
506     }
507 }
508 
509 void
_xdg_glob_hash_free(XdgGlobHash * glob_hash)510 _xdg_glob_hash_free (XdgGlobHash *glob_hash)
511 {
512   _xdg_glob_list_free (glob_hash->literal_list);
513   _xdg_glob_list_free (glob_hash->full_list);
514   _xdg_glob_hash_free_nodes (glob_hash->simple_node);
515   free (glob_hash);
516 }
517 
518 XdgGlobType
_xdg_glob_determine_type(const char * glob)519 _xdg_glob_determine_type (const char *glob)
520 {
521   const char *ptr;
522   int maybe_in_simple_glob = FALSE;
523   int first_char = TRUE;
524 
525   ptr = glob;
526 
527   while (*ptr != '\0')
528     {
529       if (*ptr == '*' && first_char)
530 	maybe_in_simple_glob = TRUE;
531       else if (*ptr == '\\' || *ptr == '[' || *ptr == '?' || *ptr == '*')
532 	  return XDG_GLOB_FULL;
533 
534       first_char = FALSE;
535       ptr = _xdg_utf8_next_char (ptr);
536     }
537   if (maybe_in_simple_glob)
538     return XDG_GLOB_SIMPLE;
539   else
540     return XDG_GLOB_LITERAL;
541 }
542 
543 /* glob must be valid UTF-8 */
544 void
_xdg_glob_hash_append_glob(XdgGlobHash * glob_hash,const char * glob,const char * mime_type,int weight,int case_sensitive)545 _xdg_glob_hash_append_glob (XdgGlobHash *glob_hash,
546 			    const char  *glob,
547 			    const char  *mime_type,
548 			    int          weight,
549 			    int          case_sensitive)
550 {
551   XdgGlobType type;
552 
553   assert (glob_hash != NULL);
554   assert (glob != NULL);
555 
556   type = _xdg_glob_determine_type (glob);
557 
558   switch (type)
559     {
560     case XDG_GLOB_LITERAL:
561       glob_hash->literal_list = _xdg_glob_list_append (glob_hash->literal_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
562       break;
563     case XDG_GLOB_SIMPLE:
564       glob_hash->simple_node = _xdg_glob_hash_insert_text (glob_hash->simple_node, glob + 1, mime_type, weight, case_sensitive);
565       break;
566     case XDG_GLOB_FULL:
567       glob_hash->full_list = _xdg_glob_list_append (glob_hash->full_list, strdup (glob), strdup (mime_type), weight, case_sensitive);
568       break;
569     }
570 }
571 
572 void
_xdg_glob_hash_dump(XdgGlobHash * glob_hash)573 _xdg_glob_hash_dump (XdgGlobHash *glob_hash)
574 {
575   XdgGlobList *list;
576   printf ("LITERAL STRINGS\n");
577   if (!glob_hash || glob_hash->literal_list == NULL)
578     {
579       printf ("    None\n");
580     }
581   else
582     {
583       for (list = glob_hash->literal_list; list; list = list->next)
584 	printf ("    %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
585     }
586   printf ("\nSIMPLE GLOBS\n");
587   if (!glob_hash || glob_hash->simple_node == NULL)
588     {
589       printf ("    None\n");
590     }
591   else
592     {
593       _xdg_glob_hash_node_dump (glob_hash->simple_node, 4);
594     }
595 
596   printf ("\nFULL GLOBS\n");
597   if (!glob_hash || glob_hash->full_list == NULL)
598     {
599       printf ("    None\n");
600     }
601   else
602     {
603       for (list = glob_hash->full_list; list; list = list->next)
604 	printf ("    %s - %s %d\n", (char *)list->data, list->mime_type, list->weight);
605     }
606 }
607 
608 
609 void
_xdg_mime_glob_read_from_file(XdgGlobHash * glob_hash,const char * file_name,int version_two)610 _xdg_mime_glob_read_from_file (XdgGlobHash *glob_hash,
611 			       const char  *file_name,
612 			       int          version_two)
613 {
614   FILE *glob_file;
615   char line[255];
616   char *p;
617 
618   glob_file = fopen (file_name, "r");
619 
620   if (glob_file == NULL)
621     return;
622 
623   /* FIXME: Not UTF-8 safe.  Doesn't work if lines are greater than 255 chars.
624    * Blah */
625   while (fgets (line, 255, glob_file) != NULL)
626     {
627       char *colon;
628       char *mimetype, *glob, *end;
629       int weight;
630       int case_sensitive;
631 
632       if (line[0] == '#' || line[0] == 0)
633 	continue;
634 
635       end = line + strlen(line) - 1;
636       if (*end == '\n')
637 	*end = 0;
638 
639       p = line;
640       if (version_two)
641 	{
642 	  colon = strchr (p, ':');
643 	  if (colon == NULL)
644 	    continue;
645 	  *colon = 0;
646           weight = atoi (p);
647 	  p = colon + 1;
648 	}
649       else
650 	weight = 50;
651 
652       colon = strchr (p, ':');
653       if (colon == NULL)
654 	continue;
655       *colon = 0;
656 
657       mimetype = p;
658       p = colon + 1;
659       glob = p;
660       case_sensitive = FALSE;
661 
662       colon = strchr (p, ':');
663       if (version_two && colon != NULL)
664 	{
665 	  char *flag;
666 
667 	  /* We got flags */
668 	  *colon = 0;
669 	  p = colon + 1;
670 
671 	  /* Flags end at next colon */
672 	  colon = strchr (p, ':');
673 	  if (colon != NULL)
674 	    *colon = 0;
675 
676 	  flag = strstr (p, "cs");
677 	  if (flag != NULL &&
678 	      /* Start or after comma */
679 	      (flag == p ||
680 	       flag[-1] == ',') &&
681 	      /* ends with comma or end of string */
682 	      (flag[2] == 0 ||
683 	       flag[2] == ','))
684 	    case_sensitive = TRUE;
685 	}
686 
687       _xdg_glob_hash_append_glob (glob_hash, glob, mimetype, weight, case_sensitive);
688     }
689 
690   fclose (glob_file);
691 }
692