1 /*
2  * Copyright (C) 2006, Jamie McCracken <jamiemcc@gnome.org>
3  * Copyright (C) 2008, Nokia <ivan.frade@nokia.com>
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  * Boston, MA  02110-1301, USA.
19  */
20 
21 #include "config.h"
22 
23 #include <string.h>
24 #include <unistd.h>
25 #include <sys/types.h>
26 #include <sys/stat.h>
27 #include <sys/statvfs.h>
28 #include <sys/file.h>
29 #include <fcntl.h>
30 #include <limits.h>
31 #include <errno.h>
32 
33 #ifdef __linux__
34 #include <sys/statfs.h>
35 #endif
36 
37 #include <glib.h>
38 #include <glib/gstdio.h>
39 #include <gio/gio.h>
40 
41 #include "tracker-log.h"
42 #include "tracker-file-utils.h"
43 #include "tracker-type-utils.h"
44 
45 #define TEXT_SNIFF_SIZE 4096
46 
47 int
tracker_file_open_fd(const gchar * path)48 tracker_file_open_fd (const gchar *path)
49 {
50 	int fd;
51 
52 	g_return_val_if_fail (path != NULL, -1);
53 
54 #if defined(__linux__)
55 	fd = g_open (path, O_RDONLY | O_NOATIME, 0);
56 	if (fd == -1 && errno == EPERM) {
57 		fd = g_open (path, O_RDONLY, 0);
58 	}
59 #else
60 	fd = g_open (path, O_RDONLY, 0);
61 #endif
62 
63 	return fd;
64 }
65 
66 FILE *
tracker_file_open(const gchar * path)67 tracker_file_open (const gchar *path)
68 {
69 	FILE *file;
70 	int fd;
71 
72 	g_return_val_if_fail (path != NULL, NULL);
73 
74 	fd = tracker_file_open_fd (path);
75 
76 	if (fd == -1) {
77 		return NULL;
78 	}
79 
80 	file = fdopen (fd, "r");
81 
82 	if (!file) {
83 		return NULL;
84 	}
85 
86 	return file;
87 }
88 
89 void
tracker_file_close(FILE * file,gboolean need_again_soon)90 tracker_file_close (FILE     *file,
91                     gboolean  need_again_soon)
92 {
93 	g_return_if_fail (file != NULL);
94 
95 #ifdef HAVE_POSIX_FADVISE
96 	if (!need_again_soon) {
97 		if (posix_fadvise (fileno (file), 0, 0, POSIX_FADV_DONTNEED) != 0)
98 			g_warning ("posix_fadvise() call failed: %m");
99 	}
100 #endif /* HAVE_POSIX_FADVISE */
101 
102 	fclose (file);
103 }
104 
105 goffset
tracker_file_get_size(const gchar * path)106 tracker_file_get_size (const gchar *path)
107 {
108 	GFileInfo *info;
109 	GFile     *file;
110 	GError    *error = NULL;
111 	goffset    size;
112 
113 	g_return_val_if_fail (path != NULL, 0);
114 
115 	file = g_file_new_for_path (path);
116 	info = g_file_query_info (file,
117 	                          G_FILE_ATTRIBUTE_STANDARD_SIZE,
118 	                          G_FILE_QUERY_INFO_NONE,
119 	                          NULL,
120 	                          &error);
121 
122 	if (G_UNLIKELY (error)) {
123 		gchar *uri;
124 
125 		uri = g_file_get_uri (file);
126 		g_message ("Could not get size for '%s', %s",
127 		           uri,
128 		           error->message);
129 		g_free (uri);
130 		g_error_free (error);
131 		size = 0;
132 	} else {
133 		size = g_file_info_get_size (info);
134 		g_object_unref (info);
135 	}
136 
137 	g_object_unref (file);
138 
139 	return size;
140 }
141 
142 static
143 guint64
file_get_mtime(GFile * file)144 file_get_mtime (GFile *file)
145 {
146 	GFileInfo *info;
147 	GError    *error = NULL;
148 	guint64    mtime;
149 
150 	info = g_file_query_info (file,
151 	                          G_FILE_ATTRIBUTE_TIME_MODIFIED,
152 	                          G_FILE_QUERY_INFO_NONE,
153 	                          NULL,
154 	                          &error);
155 
156 	if (G_UNLIKELY (error)) {
157 		gchar *uri;
158 
159 		uri = g_file_get_uri (file);
160 		g_message ("Could not get mtime for '%s': %s",
161 		           uri,
162 		           error->message);
163 		g_free (uri);
164 		g_error_free (error);
165 		mtime = 0;
166 	} else {
167 		mtime = g_file_info_get_attribute_uint64 (info, G_FILE_ATTRIBUTE_TIME_MODIFIED);
168 		g_object_unref (info);
169 	}
170 
171 	return mtime;
172 }
173 
174 guint64
tracker_file_get_mtime(const gchar * path)175 tracker_file_get_mtime (const gchar *path)
176 {
177 	GFile     *file;
178 	guint64    mtime;
179 
180 	g_return_val_if_fail (path != NULL, 0);
181 
182 	file = g_file_new_for_path (path);
183 
184 	mtime = file_get_mtime (file);
185 
186 	g_object_unref (file);
187 
188 	return mtime;
189 }
190 
191 
192 guint64
tracker_file_get_mtime_uri(const gchar * uri)193 tracker_file_get_mtime_uri (const gchar *uri)
194 {
195 	GFile     *file;
196 	guint64    mtime;
197 
198 	g_return_val_if_fail (uri != NULL, 0);
199 
200 	file = g_file_new_for_uri (uri);
201 
202 	mtime = file_get_mtime (file);
203 
204 	g_object_unref (file);
205 
206 	return mtime;
207 }
208 
209 gchar *
tracker_file_get_mime_type(GFile * file)210 tracker_file_get_mime_type (GFile *file)
211 {
212 	GFileInfo *info;
213 	GError    *error = NULL;
214 	gchar     *content_type;
215 
216 	g_return_val_if_fail (G_IS_FILE (file), NULL);
217 
218 	info = g_file_query_info (file,
219 	                          G_FILE_ATTRIBUTE_STANDARD_CONTENT_TYPE,
220 	                          G_FILE_QUERY_INFO_NONE,
221 	                          NULL,
222 	                          &error);
223 
224 	if (G_UNLIKELY (error)) {
225 		gchar *uri;
226 
227 		uri = g_file_get_uri (file);
228 		g_message ("Could not guess mimetype for '%s', %s",
229 		           uri,
230 		           error->message);
231 		g_free (uri);
232 		g_error_free (error);
233 		content_type = NULL;
234 	} else {
235 		content_type = g_strdup (g_file_info_get_content_type (info));
236 		g_object_unref (info);
237 	}
238 
239 	return content_type ? content_type : g_strdup ("unknown");
240 }
241 
242 #ifdef __linux__
243 
244 #define __bsize f_bsize
245 
246 #ifdef __USE_LARGEFILE64
247 #define __statvfs statfs64
248 #else
249 #define __statvfs statfs
250 #endif
251 
252 #else /* __linux__ */
253 
254 #define __bsize f_frsize
255 
256 #ifdef HAVE_STATVFS64
257 #define __statvfs statvfs64
258 #else
259 #define __statvfs statvfs
260 #endif
261 
262 #endif /* __linux__ */
263 
264 static gboolean
statvfs_helper(const gchar * path,struct __statvfs * st)265 statvfs_helper (const gchar *path, struct __statvfs *st)
266 {
267 	gchar *_path;
268 	int retval;
269 
270 //LCOV_EXCL_START
271 	/* Iterate up the path to the root until statvfs() doesn’t error with
272 	 * ENOENT. This prevents the call failing on first-startup when (for
273 	 * example) ~/.cache/tracker might not exist. */
274 	_path = g_strdup (path);
275 
276 	while ((retval = __statvfs (_path, st)) == -1 && errno == ENOENT) {
277 		gchar *tmp = g_path_get_dirname (_path);
278 		g_free (_path);
279 		_path = tmp;
280 	}
281 
282 	g_free (_path);
283 //LCOV_EXCL_STOP
284 
285 	if (retval == -1) {
286 		g_critical ("Could not statvfs() '%s': %s",
287 		            path,
288 		            g_strerror (errno));
289 	}
290 
291 	return (retval == 0);
292 }
293 
294 guint64
tracker_file_system_get_remaining_space(const gchar * path)295 tracker_file_system_get_remaining_space (const gchar *path)
296 {
297 	struct __statvfs st;
298 	guint64 available;
299 
300 	if (statvfs_helper (path, &st)) {
301 		available = (geteuid () == 0) ? st.f_bfree : st.f_bavail;
302 		/* __bsize is a platform dependent #define above */
303 		return st.__bsize * available;
304 	} else {
305 		return 0;
306 	}
307 }
308 
309 gdouble
tracker_file_system_get_remaining_space_percentage(const gchar * path)310 tracker_file_system_get_remaining_space_percentage (const gchar *path)
311 {
312 	struct __statvfs st;
313 	guint64 available;
314 
315 	if (statvfs_helper (path, &st)) {
316 		available = (geteuid () == 0) ? st.f_bfree : st.f_bavail;
317 		return (((gdouble) available * 100) / st.f_blocks);
318 	} else {
319 		return 0.0;
320 	}
321 }
322 
323 gboolean
tracker_file_system_has_enough_space(const gchar * path,gulong required_bytes,gboolean creating_db)324 tracker_file_system_has_enough_space (const gchar *path,
325                                       gulong       required_bytes,
326                                       gboolean     creating_db)
327 {
328 	gchar *str1;
329 	gchar *str2;
330 	gboolean enough;
331 	guint64 remaining;
332 
333 	g_return_val_if_fail (path != NULL, FALSE);
334 
335 	remaining = tracker_file_system_get_remaining_space (path);
336 	enough = (remaining >= required_bytes);
337 
338 	if (creating_db) {
339 		str1 = g_format_size (required_bytes);
340 		str2 = g_format_size (remaining);
341 
342 		if (!enough) {
343 			g_critical ("Not enough disk space to create databases, "
344 			            "%s remaining, %s required as a minimum",
345 			            str2,
346 			            str1);
347 		} else {
348 			g_debug ("Checking for adequate disk space to create databases, "
349 			         "%s remaining, %s required as a minimum",
350 			         str2,
351 			         str1);
352 		}
353 
354 		g_free (str2);
355 		g_free (str1);
356 	}
357 
358 	return enough;
359 }
360 
361 gboolean
tracker_path_is_in_path(const gchar * path,const gchar * in_path)362 tracker_path_is_in_path (const gchar *path,
363                          const gchar *in_path)
364 {
365 	gchar    *new_path;
366 	gchar    *new_in_path;
367 	gboolean  is_in_path = FALSE;
368 
369 	g_return_val_if_fail (path != NULL, FALSE);
370 	g_return_val_if_fail (in_path != NULL, FALSE);
371 
372 	if (!g_str_has_suffix (path, G_DIR_SEPARATOR_S)) {
373 		new_path = g_strconcat (path, G_DIR_SEPARATOR_S, NULL);
374 	} else {
375 		new_path = g_strdup (path);
376 	}
377 
378 	if (!g_str_has_suffix (in_path, G_DIR_SEPARATOR_S)) {
379 		new_in_path = g_strconcat (in_path, G_DIR_SEPARATOR_S, NULL);
380 	} else {
381 		new_in_path = g_strdup (in_path);
382 	}
383 
384 	if (g_str_has_prefix (new_path, new_in_path)) {
385 		is_in_path = TRUE;
386 	}
387 
388 	g_free (new_in_path);
389 	g_free (new_path);
390 
391 	return is_in_path;
392 }
393 
394 GSList *
tracker_path_list_filter_duplicates(GSList * roots,const gchar * basename_exception_prefix,gboolean is_recursive)395 tracker_path_list_filter_duplicates (GSList      *roots,
396                                      const gchar *basename_exception_prefix,
397                                      gboolean     is_recursive)
398 {
399 	GSList *l1, *l2;
400 	GSList *new_list;
401 
402 	new_list = tracker_gslist_copy_with_string_data (roots);
403 	l1 = new_list;
404 
405 	while (l1) {
406 		const gchar *path;
407 		gchar       *p;
408 		gboolean     reset = FALSE;
409 
410 		path = l1->data;
411 
412 		l2 = new_list;
413 
414 		while (l2 && !reset) {
415 			const gchar *in_path;
416 
417 			in_path = l2->data;
418 
419 			if (path == in_path) {
420 				/* Do nothing */
421 				l2 = l2->next;
422 				continue;
423 			}
424 
425 			if (basename_exception_prefix) {
426 				gchar *lbasename;
427 				gboolean has_prefix = FALSE;
428 
429 				lbasename = g_path_get_basename (path);
430 				if (!g_str_has_prefix (lbasename, basename_exception_prefix)) {
431 					g_free (lbasename);
432 
433 					lbasename = g_path_get_basename (in_path);
434 					if (g_str_has_prefix (lbasename, basename_exception_prefix)) {
435 						has_prefix = TRUE;
436 					}
437 				} else {
438 					has_prefix = TRUE;
439 				}
440 
441 				g_free (lbasename);
442 
443 				/* This is so we can ignore this check
444 				 * on files which prefix with ".".
445 				 */
446 				if (has_prefix) {
447 					l2 = l2->next;
448 					continue;
449 				}
450 			}
451 
452 			if (is_recursive && tracker_path_is_in_path (path, in_path)) {
453 				g_debug ("Removing path:'%s', it is in path:'%s'",
454 				         path, in_path);
455 
456 				g_free (l1->data);
457 				new_list = g_slist_delete_link (new_list, l1);
458 				l1 = new_list;
459 
460 				reset = TRUE;
461 
462 				continue;
463 			} else if (is_recursive && tracker_path_is_in_path (in_path, path)) {
464 				g_debug ("Removing path:'%s', it is in path:'%s'",
465 				         in_path, path);
466 
467 				g_free (l2->data);
468 				new_list = g_slist_delete_link (new_list, l2);
469 				l2 = new_list;
470 
471 				reset = TRUE;
472 
473 				continue;
474 			}
475 
476 			l2 = l2->next;
477 		}
478 
479 		if (G_LIKELY (!reset)) {
480 			p = strrchr (path, G_DIR_SEPARATOR);
481 
482 			/* Make sure the path doesn't have the '/' suffix. */
483 			if (p && !p[1]) {
484 				*p = '\0';
485 			}
486 
487 			l1 = l1->next;
488 		}
489 	}
490 
491 #ifdef TESTING
492 	g_debug ("GSList paths were filtered down to:");
493 
494 	if (TRUE) {
495 		GSList *l;
496 
497 		for (l = new_list; l; l = l->next) {
498 			g_debug ("  %s", (gchar*) l->data);
499 		}
500 	}
501 #endif /* TESTING */
502 
503 	return new_list;
504 }
505 
506 const struct {
507 	const gchar *symbol;
508 	GUserDirectory user_dir;
509 } special_dirs[] = {
510 	{"&DESKTOP",      G_USER_DIRECTORY_DESKTOP},
511 	{"&DOCUMENTS",    G_USER_DIRECTORY_DOCUMENTS},
512 	{"&DOWNLOAD",     G_USER_DIRECTORY_DOWNLOAD},
513 	{"&MUSIC",        G_USER_DIRECTORY_MUSIC},
514 	{"&PICTURES",     G_USER_DIRECTORY_PICTURES},
515 	{"&PUBLIC_SHARE", G_USER_DIRECTORY_PUBLIC_SHARE},
516 	{"&TEMPLATES",    G_USER_DIRECTORY_TEMPLATES},
517 	{"&VIDEOS",       G_USER_DIRECTORY_VIDEOS}
518 };
519 
520 
521 static gboolean
get_user_special_dir_if_not_home(const gchar * path,gchar ** special_dir)522 get_user_special_dir_if_not_home (const gchar  *path,
523                                   gchar       **special_dir)
524 {
525 	int i;
526 	const gchar *real_path;
527 	GFile *home, *file;
528 
529 	real_path = NULL;
530 	*special_dir = NULL;
531 
532 	for (i = 0; i < G_N_ELEMENTS(special_dirs); i++) {
533 		if (strcmp (path, special_dirs[i].symbol) == 0) {
534 			real_path = g_get_user_special_dir (special_dirs[i].user_dir);
535 
536 			if (real_path == NULL) {
537 				g_warning ("Unable to get XDG user directory path for special "
538 				           "directory %s. Ignoring this location.", path);
539 			}
540 
541 			break;
542 		}
543 	}
544 
545 	if (real_path == NULL)
546 		return FALSE;
547 
548 	file = g_file_new_for_path (real_path);
549 	home = g_file_new_for_path (g_get_home_dir ());
550 
551 	/* ignore XDG directories set to $HOME */
552 	if (!g_file_equal (file, home)) {
553 		*special_dir = g_strdup (real_path);
554 	}
555 
556 	g_object_unref (file);
557 	g_object_unref (home);
558 
559 	return TRUE;
560 }
561 
562 
563 gchar *
tracker_path_evaluate_name(const gchar * path)564 tracker_path_evaluate_name (const gchar *path)
565 {
566 	gchar        *special_dir_path;
567 	gchar        *final_path;
568 	gchar       **tokens;
569 	gchar       **token;
570 	gchar        *start;
571 	gchar        *end;
572 	const gchar  *env;
573 	gchar        *expanded;
574 
575 	if (!path || path[0] == '\0') {
576 		return NULL;
577 	}
578 
579 	/* See if it is a special directory name. */
580 	if (get_user_special_dir_if_not_home (path, &special_dir_path))
581 		return special_dir_path;
582 
583 	/* First check the simple case of using tilde */
584 	if (path[0] == '~') {
585 		const gchar *home;
586 
587 		home = g_getenv ("HOME");
588 		if (! home) {
589 			home = g_get_home_dir ();
590 		}
591 
592 		if (!home || home[0] == '\0') {
593 			return NULL;
594 		}
595 
596 		return g_build_path (G_DIR_SEPARATOR_S,
597 		                     home,
598 		                     path + 1,
599 		                     NULL);
600 	}
601 
602 	/* Second try to find any environment variables and expand
603 	 * them, like $HOME or ${FOO}
604 	 */
605 	tokens = g_strsplit (path, G_DIR_SEPARATOR_S, -1);
606 
607 	for (token = tokens; *token; token++) {
608 		if (**token != '$') {
609 			continue;
610 		}
611 
612 		start = *token + 1;
613 
614 		if (*start == '{') {
615 			start++;
616 			end = start + (strlen (start)) - 1;
617 			*end='\0';
618 		}
619 
620 		env = g_getenv (start);
621 		g_free (*token);
622 
623 		/* Don't do g_strdup (s?s1:s2) as that doesn't work
624 		 * with certain gcc 2.96 versions.
625 		 */
626 		*token = env ? g_strdup (env) : g_strdup ("");
627 	}
628 
629 	/* Third get the real path removing any "../" and other
630 	 * symbolic links to other places, returning only the REAL
631 	 * location.
632 	 */
633 	expanded = g_strjoinv (G_DIR_SEPARATOR_S, tokens);
634 	g_strfreev (tokens);
635 
636 	/* Only resolve relative paths if there is a directory
637 	 * separator in the path, otherwise it is just a name.
638 	 */
639 	if (strchr (expanded, G_DIR_SEPARATOR)) {
640 		GFile *file;
641 
642 		file = g_file_new_for_commandline_arg (expanded);
643 		final_path = g_file_get_path (file);
644 		g_object_unref (file);
645 		g_free (expanded);
646 	} else {
647 		final_path = expanded;
648 	}
649 
650 	return final_path;
651 }
652 
653 gboolean
tracker_file_is_hidden(GFile * file)654 tracker_file_is_hidden (GFile *file)
655 {
656 	GFileInfo *file_info;
657 	gboolean is_hidden = FALSE;
658 
659 	file_info = g_file_query_info (file,
660 	                               G_FILE_ATTRIBUTE_STANDARD_IS_HIDDEN,
661 	                               G_FILE_QUERY_INFO_NOFOLLOW_SYMLINKS,
662 	                               NULL, NULL);
663 	if (file_info) {
664 		/* Check if GIO says the file is hidden */
665 		is_hidden = g_file_info_get_is_hidden (file_info);
666 		g_object_unref (file_info);
667 	} else {
668 		gchar *basename;
669 
670 		/* Resort last to basename checks, this might happen on
671 		 * already deleted files.
672 		 */
673 		basename = g_file_get_basename (file);
674 		is_hidden = basename[0] == '.';
675 		g_free (basename);
676 	}
677 
678 	return is_hidden;
679 }
680 
681 gint
tracker_file_cmp(GFile * file_a,GFile * file_b)682 tracker_file_cmp (GFile *file_a,
683                   GFile *file_b)
684 {
685 	/* Returns 0 if files are equal.
686 	 * Useful to be used in g_list_find_custom() or g_queue_find_custom() */
687 	return !g_file_equal (file_a, file_b);
688 }
689 
690 /**
691  * tracker_filename_casecmp_without_extension:
692  * @a: a string containing a file name
693  * @b: filename to be compared with @a
694  *
695  * This function performs a case-insensitive comparison of @a and @b.
696  * Additionally, text beyond the last '.' in a string is not considered
697  * part of the match, so for example given the inputs "file.mp3" and
698  * "file.wav" this function will return %TRUE.
699  *
700  * Internally, the g_ascii_tolower() function is used - this means that
701  * @a and @b must be in an encoding in which ASCII characters always
702  * represent themselves, such as UTF-8 or the ISO-8859-* charsets.
703  *
704  * Returns: %TRUE if the two file names match.
705  **/
706 gboolean
tracker_filename_casecmp_without_extension(const gchar * a,const gchar * b)707 tracker_filename_casecmp_without_extension (const gchar *a,
708                                             const gchar *b)
709 {
710 	gchar *pa;
711 	gchar *pb;
712 	gint len_a;
713 	gint len_b;
714 
715 	g_return_val_if_fail (a != NULL, FALSE);
716 	g_return_val_if_fail (b != NULL, FALSE);
717 
718 	pa = strrchr (a, '.');
719 	pb = strrchr (b, '.');
720 
721 	/* Did we find a "." */
722 	if (pa) {
723 		len_a = pa - a;
724 	} else {
725 		len_a = -1;
726 	}
727 
728 	if (pb) {
729 		len_b = pb - b;
730 	} else {
731 		len_b = -1;
732 	}
733 
734 	/* If one has a "." and the other doesn't, we do length
735 	 * comparison with strlen() which is less optimal but this is
736 	 * not a case we consider common operation.
737 	 */
738 	if (len_a == -1 && len_b > -1) {
739 		len_a = strlen (a);
740 	} else if (len_b == -1 && len_a > -1) {
741 		len_b = strlen (b);
742 	}
743 
744 	/* If we have length for both and it's different then these
745 	 * strings are not the same. If we have no length for the
746 	 * strings then it's a simple -1 != -1 comparison.
747 	 */
748 	if (len_a != len_b) {
749 		return FALSE;
750 	}
751 
752 	/* Now we know we either have the same length string or no
753 	 * extension in a and b, meaning it's a strcmp() of the
754 	 * string only. We test only len_a or len_b here for that:
755 	 */
756 	if (G_UNLIKELY (len_a == -1)) {
757 		return g_ascii_strcasecmp (a, b) == 0;
758 	}
759 
760 	return g_ascii_strncasecmp (a, b, len_a) == 0;
761 }
762