1 /*
2  * Copyright (C) the libgit2 contributors. All rights reserved.
3  *
4  * This file is part of libgit2, distributed under the GNU GPL v2 with
5  * a Linking Exception. For full terms see the included COPYING file.
6  */
7 #ifndef INCLUDE_path_h__
8 #define INCLUDE_path_h__
9 
10 #include "common.h"
11 
12 #include "posix.h"
13 #include "buffer.h"
14 #include "vector.h"
15 
16 #include "git2/sys/path.h"
17 
18 /**
19  * Path manipulation utils
20  *
21  * These are path utilities that munge paths without actually
22  * looking at the real filesystem.
23  */
24 
25 /*
26  * The dirname() function shall take a pointer to a character string
27  * that contains a pathname, and return a pointer to a string that is a
28  * pathname of the parent directory of that file. Trailing '/' characters
29  * in the path are not counted as part of the path.
30  *
31  * If path does not contain a '/', then dirname() shall return a pointer to
32  * the string ".". If path is a null pointer or points to an empty string,
33  * dirname() shall return a pointer to the string "." .
34  *
35  * The `git_path_dirname` implementation is thread safe. The returned
36  * string must be manually free'd.
37  *
38  * The `git_path_dirname_r` implementation writes the dirname to a `git_buf`
39  * if the buffer pointer is not NULL.
40  * It returns an error code < 0 if there is an allocation error, otherwise
41  * the length of the dirname (which will be > 0).
42  */
43 extern char *git_path_dirname(const char *path);
44 extern int git_path_dirname_r(git_buf *buffer, const char *path);
45 
46 /*
47  * This function returns the basename of the file, which is the last
48  * part of its full name given by fname, with the drive letter and
49  * leading directories stripped off. For example, the basename of
50  * c:/foo/bar/file.ext is file.ext, and the basename of a:foo is foo.
51  *
52  * Trailing slashes and backslashes are significant: the basename of
53  * c:/foo/bar/ is an empty string after the rightmost slash.
54  *
55  * The `git_path_basename` implementation is thread safe. The returned
56  * string must be manually free'd.
57  *
58  * The `git_path_basename_r` implementation writes the basename to a `git_buf`.
59  * It returns an error code < 0 if there is an allocation error, otherwise
60  * the length of the basename (which will be >= 0).
61  */
62 extern char *git_path_basename(const char *path);
63 extern int git_path_basename_r(git_buf *buffer, const char *path);
64 
65 /* Return the offset of the start of the basename.  Unlike the other
66  * basename functions, this returns 0 if the path is empty.
67  */
68 extern size_t git_path_basename_offset(git_buf *buffer);
69 
70 /**
71  * Find offset to root of path if path has one.
72  *
73  * This will return a number >= 0 which is the offset to the start of the
74  * path, if the path is rooted (i.e. "/rooted/path" returns 0 and
75  * "c:/windows/rooted/path" returns 2).  If the path is not rooted, this
76  * returns -1.
77  */
78 extern int git_path_root(const char *path);
79 
80 /**
81  * Ensure path has a trailing '/'.
82  */
83 extern int git_path_to_dir(git_buf *path);
84 
85 /**
86  * Ensure string has a trailing '/' if there is space for it.
87  */
88 extern void git_path_string_to_dir(char* path, size_t size);
89 
90 /**
91  * Taken from git.git; returns nonzero if the given path is "." or "..".
92  */
git_path_is_dot_or_dotdot(const char * name)93 GIT_INLINE(int) git_path_is_dot_or_dotdot(const char *name)
94 {
95 	return (name[0] == '.' &&
96 			  (name[1] == '\0' ||
97 				(name[1] == '.' && name[2] == '\0')));
98 }
99 
100 #ifdef GIT_WIN32
git_path_is_dot_or_dotdotW(const wchar_t * name)101 GIT_INLINE(int) git_path_is_dot_or_dotdotW(const wchar_t *name)
102 {
103 	return (name[0] == L'.' &&
104 			  (name[1] == L'\0' ||
105 				(name[1] == L'.' && name[2] == L'\0')));
106 }
107 
108 #define git_path_is_absolute(p) \
109 	(git__isalpha((p)[0]) && (p)[1] == ':' && ((p)[2] == '\\' || (p)[2] == '/'))
110 
111 #define git_path_is_dirsep(p) \
112 	((p) == '/' || (p) == '\\')
113 
114 /**
115  * Convert backslashes in path to forward slashes.
116  */
git_path_mkposix(char * path)117 GIT_INLINE(void) git_path_mkposix(char *path)
118 {
119 	while (*path) {
120 		if (*path == '\\')
121 			*path = '/';
122 
123 		path++;
124 	}
125 }
126 #else
127 #	define git_path_mkposix(p) /* blank */
128 
129 #define git_path_is_absolute(p) \
130 	((p)[0] == '/')
131 
132 #define git_path_is_dirsep(p) \
133 	((p) == '/')
134 
135 #endif
136 
137 /**
138  * Check if string is a relative path (i.e. starts with "./" or "../")
139  */
git_path_is_relative(const char * p)140 GIT_INLINE(int) git_path_is_relative(const char *p)
141 {
142 	return (p[0] == '.' && (p[1] == '/' || (p[1] == '.' && p[2] == '/')));
143 }
144 
145 /**
146  * Check if string is at end of path segment (i.e. looking at '/' or '\0')
147  */
git_path_at_end_of_segment(const char * p)148 GIT_INLINE(int) git_path_at_end_of_segment(const char *p)
149 {
150 	return !*p || *p == '/';
151 }
152 
153 extern int git__percent_decode(git_buf *decoded_out, const char *input);
154 
155 /**
156  * Extract path from file:// URL.
157  */
158 extern int git_path_fromurl(git_buf *local_path_out, const char *file_url);
159 
160 
161 /**
162  * Path filesystem utils
163  *
164  * These are path utilities that actually access the filesystem.
165  */
166 
167 /**
168  * Check if a file exists and can be accessed.
169  * @return true or false
170  */
171 extern bool git_path_exists(const char *path);
172 
173 /**
174  * Check if the given path points to a directory.
175  * @return true or false
176  */
177 extern bool git_path_isdir(const char *path);
178 
179 /**
180  * Check if the given path points to a regular file.
181  * @return true or false
182  */
183 extern bool git_path_isfile(const char *path);
184 
185 /**
186  * Check if the given path points to a symbolic link.
187  * @return true or false
188  */
189 extern bool git_path_islink(const char *path);
190 
191 /**
192  * Check if the given path is a directory, and is empty.
193  */
194 extern bool git_path_is_empty_dir(const char *path);
195 
196 /**
197  * Stat a file and/or link and set error if needed.
198  */
199 extern int git_path_lstat(const char *path, struct stat *st);
200 
201 /**
202  * Check if the parent directory contains the item.
203  *
204  * @param dir Directory to check.
205  * @param item Item that might be in the directory.
206  * @return 0 if item exists in directory, <0 otherwise.
207  */
208 extern bool git_path_contains(git_buf *dir, const char *item);
209 
210 /**
211  * Check if the given path contains the given subdirectory.
212  *
213  * @param parent Directory path that might contain subdir
214  * @param subdir Subdirectory name to look for in parent
215  * @return true if subdirectory exists, false otherwise.
216  */
217 extern bool git_path_contains_dir(git_buf *parent, const char *subdir);
218 
219 /**
220  * Determine the common directory length between two paths, including
221  * the final path separator.  For example, given paths 'a/b/c/1.txt
222  * and 'a/b/c/d/2.txt', the common directory is 'a/b/c/', and this
223  * will return the length of the string 'a/b/c/', which is 6.
224  *
225  * @param one The first path
226  * @param two The second path
227  * @return The length of the common directory
228  */
229 extern size_t git_path_common_dirlen(const char *one, const char *two);
230 
231 /**
232  * Make the path relative to the given parent path.
233  *
234  * @param path The path to make relative
235  * @param parent The parent path to make path relative to
236  * @return 0 if path was made relative, GIT_ENOTFOUND
237  *         if there was not common root between the paths,
238  *         or <0.
239  */
240 extern int git_path_make_relative(git_buf *path, const char *parent);
241 
242 /**
243  * Check if the given path contains the given file.
244  *
245  * @param dir Directory path that might contain file
246  * @param file File name to look for in parent
247  * @return true if file exists, false otherwise.
248  */
249 extern bool git_path_contains_file(git_buf *dir, const char *file);
250 
251 /**
252  * Prepend base to unrooted path or just copy path over.
253  *
254  * This will optionally return the index into the path where the "root"
255  * is, either the end of the base directory prefix or the path root.
256  */
257 extern int git_path_join_unrooted(
258 	git_buf *path_out, const char *path, const char *base, ssize_t *root_at);
259 
260 /**
261  * Removes multiple occurrences of '/' in a row, squashing them into a
262  * single '/'.
263  */
264 extern void git_path_squash_slashes(git_buf *path);
265 
266 /**
267  * Clean up path, prepending base if it is not already rooted.
268  */
269 extern int git_path_prettify(git_buf *path_out, const char *path, const char *base);
270 
271 /**
272  * Clean up path, prepending base if it is not already rooted and
273  * appending a slash.
274  */
275 extern int git_path_prettify_dir(git_buf *path_out, const char *path, const char *base);
276 
277 /**
278  * Get a directory from a path.
279  *
280  * If path is a directory, this acts like `git_path_prettify_dir`
281  * (cleaning up path and appending a '/').  If path is a normal file,
282  * this prettifies it, then removed the filename a la dirname and
283  * appends the trailing '/'.  If the path does not exist, it is
284  * treated like a regular filename.
285  */
286 extern int git_path_find_dir(git_buf *dir, const char *path, const char *base);
287 
288 /**
289  * Resolve relative references within a path.
290  *
291  * This eliminates "./" and "../" relative references inside a path,
292  * as well as condensing multiple slashes into single ones.  It will
293  * not touch the path before the "ceiling" length.
294  *
295  * Additionally, this will recognize an "c:/" drive prefix or a "xyz://" URL
296  * prefix and not touch that part of the path.
297  */
298 extern int git_path_resolve_relative(git_buf *path, size_t ceiling);
299 
300 /**
301  * Apply a relative path to base path.
302  *
303  * Note that the base path could be a filename or a URL and this
304  * should still work.  The relative path is walked segment by segment
305  * with three rules: series of slashes will be condensed to a single
306  * slash, "." will be eaten with no change, and ".." will remove a
307  * segment from the base path.
308  */
309 extern int git_path_apply_relative(git_buf *target, const char *relpath);
310 
311 enum {
312 	GIT_PATH_DIR_IGNORE_CASE = (1u << 0),
313 	GIT_PATH_DIR_PRECOMPOSE_UNICODE = (1u << 1),
314 	GIT_PATH_DIR_INCLUDE_DOT_AND_DOTDOT = (1u << 2),
315 };
316 
317 /**
318  * Walk each directory entry, except '.' and '..', calling fn(state).
319  *
320  * @param pathbuf Buffer the function reads the initial directory
321  * 		path from, and updates with each successive entry's name.
322  * @param flags Combination of GIT_PATH_DIR flags.
323  * @param callback Callback for each entry. Passed the `payload` and each
324  *		successive path inside the directory as a full path.  This may
325  *		safely append text to the pathbuf if needed.  Return non-zero to
326  *		cancel iteration (and return value will be propagated back).
327  * @param payload Passed to callback as first argument.
328  * @return 0 on success or error code from OS error or from callback
329  */
330 extern int git_path_direach(
331 	git_buf *pathbuf,
332 	uint32_t flags,
333 	int (*callback)(void *payload, git_buf *path),
334 	void *payload);
335 
336 /**
337  * Sort function to order two paths
338  */
339 extern int git_path_cmp(
340 	const char *name1, size_t len1, int isdir1,
341 	const char *name2, size_t len2, int isdir2,
342 	int (*compare)(const char *, const char *, size_t));
343 
344 /**
345  * Invoke callback up path directory by directory until the ceiling is
346  * reached (inclusive of a final call at the root_path).
347  *
348  * Returning anything other than 0 from the callback function
349  * will stop the iteration and propagate the error to the caller.
350  *
351  * @param pathbuf Buffer the function reads the directory from and
352  *		and updates with each successive name.
353  * @param ceiling Prefix of path at which to stop walking up.  If NULL,
354  *		this will walk all the way up to the root.  If not a prefix of
355  *		pathbuf, the callback will be invoked a single time on the
356  *		original input path.
357  * @param callback Function to invoke on each path.  Passed the `payload`
358  *		and the buffer containing the current path.  The path should not
359  *		be modified in any way. Return non-zero to stop iteration.
360  * @param payload Passed to fn as the first ath.
361  */
362 extern int git_path_walk_up(
363 	git_buf *pathbuf,
364 	const char *ceiling,
365 	int (*callback)(void *payload, const char *path),
366 	void *payload);
367 
368 
369 enum { GIT_PATH_NOTEQUAL = 0, GIT_PATH_EQUAL = 1, GIT_PATH_PREFIX = 2 };
370 
371 /*
372  * Determines if a path is equal to or potentially a child of another.
373  * @param parent The possible parent
374  * @param child The possible child
375  */
git_path_equal_or_prefixed(const char * parent,const char * child,ssize_t * prefixlen)376 GIT_INLINE(int) git_path_equal_or_prefixed(
377 	const char *parent,
378 	const char *child,
379 	ssize_t *prefixlen)
380 {
381 	const char *p = parent, *c = child;
382 	int lastslash = 0;
383 
384 	while (*p && *c) {
385 		lastslash = (*p == '/');
386 
387 		if (*p++ != *c++)
388 			return GIT_PATH_NOTEQUAL;
389 	}
390 
391 	if (*p != '\0')
392 		return GIT_PATH_NOTEQUAL;
393 
394 	if (*c == '\0') {
395 		if (prefixlen)
396 			*prefixlen = p - parent;
397 
398 		return GIT_PATH_EQUAL;
399 	}
400 
401 	if (*c == '/' || lastslash) {
402 		if (prefixlen)
403 			*prefixlen = (p - parent) - lastslash;
404 
405 		return GIT_PATH_PREFIX;
406 	}
407 
408 	return GIT_PATH_NOTEQUAL;
409 }
410 
411 /* translate errno to libgit2 error code and set error message */
412 extern int git_path_set_error(
413 	int errno_value, const char *path, const char *action);
414 
415 /* check if non-ascii characters are present in filename */
416 extern bool git_path_has_non_ascii(const char *path, size_t pathlen);
417 
418 #define GIT_PATH_REPO_ENCODING "UTF-8"
419 
420 #ifdef __APPLE__
421 #define GIT_PATH_NATIVE_ENCODING "UTF-8-MAC"
422 #else
423 #define GIT_PATH_NATIVE_ENCODING "UTF-8"
424 #endif
425 
426 #ifdef GIT_USE_ICONV
427 
428 #include <iconv.h>
429 
430 typedef struct {
431 	iconv_t map;
432 	git_buf buf;
433 } git_path_iconv_t;
434 
435 #define GIT_PATH_ICONV_INIT { (iconv_t)-1, GIT_BUF_INIT }
436 
437 /* Init iconv data for converting decomposed UTF-8 to precomposed */
438 extern int git_path_iconv_init_precompose(git_path_iconv_t *ic);
439 
440 /* Clear allocated iconv data */
441 extern void git_path_iconv_clear(git_path_iconv_t *ic);
442 
443 /*
444  * Rewrite `in` buffer using iconv map if necessary, replacing `in`
445  * pointer internal iconv buffer if rewrite happened.  The `in` pointer
446  * will be left unchanged if no rewrite was needed.
447  */
448 extern int git_path_iconv(git_path_iconv_t *ic, const char **in, size_t *inlen);
449 
450 #endif /* GIT_USE_ICONV */
451 
452 extern bool git_path_does_fs_decompose_unicode(const char *root);
453 
454 
455 typedef struct git_path_diriter git_path_diriter;
456 
457 #if defined(GIT_WIN32) && !defined(__MINGW32__)
458 
459 struct git_path_diriter
460 {
461 	git_win32_path path;
462 	size_t parent_len;
463 
464 	git_buf path_utf8;
465 	size_t parent_utf8_len;
466 
467 	HANDLE handle;
468 
469 	unsigned int flags;
470 
471 	WIN32_FIND_DATAW current;
472 	unsigned int needs_next;
473 };
474 
475 #define GIT_PATH_DIRITER_INIT { {0}, 0, GIT_BUF_INIT, 0, INVALID_HANDLE_VALUE }
476 
477 #else
478 
479 struct git_path_diriter
480 {
481 	git_buf path;
482 	size_t parent_len;
483 
484 	unsigned int flags;
485 
486 	DIR *dir;
487 
488 #ifdef GIT_USE_ICONV
489 	git_path_iconv_t ic;
490 #endif
491 };
492 
493 #define GIT_PATH_DIRITER_INIT { GIT_BUF_INIT }
494 
495 #endif
496 
497 /**
498  * Initialize a directory iterator.
499  *
500  * @param diriter Pointer to a diriter structure that will be setup.
501  * @param path The path that will be iterated over
502  * @param flags Directory reader flags
503  * @return 0 or an error code
504  */
505 extern int git_path_diriter_init(
506 	git_path_diriter *diriter,
507 	const char *path,
508 	unsigned int flags);
509 
510 /**
511  * Advance the directory iterator.  Will return GIT_ITEROVER when
512  * the iteration has completed successfully.
513  *
514  * @param diriter The directory iterator
515  * @return 0, GIT_ITEROVER, or an error code
516  */
517 extern int git_path_diriter_next(git_path_diriter *diriter);
518 
519 /**
520  * Returns the file name of the current item in the iterator.
521  *
522  * @param out Pointer to store the path in
523  * @param out_len Pointer to store the length of the path in
524  * @param diriter The directory iterator
525  * @return 0 or an error code
526  */
527 extern int git_path_diriter_filename(
528 	const char **out,
529 	size_t *out_len,
530 	git_path_diriter *diriter);
531 
532 /**
533  * Returns the full path of the current item in the iterator; that
534  * is the current filename plus the path of the directory that the
535  * iterator was constructed with.
536  *
537  * @param out Pointer to store the path in
538  * @param out_len Pointer to store the length of the path in
539  * @param diriter The directory iterator
540  * @return 0 or an error code
541  */
542 extern int git_path_diriter_fullpath(
543 	const char **out,
544 	size_t *out_len,
545 	git_path_diriter *diriter);
546 
547 /**
548  * Performs an `lstat` on the current item in the iterator.
549  *
550  * @param out Pointer to store the stat data in
551  * @param diriter The directory iterator
552  * @return 0 or an error code
553  */
554 extern int git_path_diriter_stat(struct stat *out, git_path_diriter *diriter);
555 
556 /**
557  * Closes the directory iterator.
558  *
559  * @param diriter The directory iterator
560  */
561 extern void git_path_diriter_free(git_path_diriter *diriter);
562 
563 /**
564  * Load all directory entries (except '.' and '..') into a vector.
565  *
566  * For cases where `git_path_direach()` is not appropriate, this
567  * allows you to load the filenames in a directory into a vector
568  * of strings. That vector can then be sorted, iterated, or whatever.
569  * Remember to free alloc of the allocated strings when you are done.
570  *
571  * @param contents Vector to fill with directory entry names.
572  * @param path The directory to read from.
573  * @param prefix_len When inserting entries, the trailing part of path
574  * 		will be prefixed after this length.  I.e. given path "/a/b" and
575  * 		prefix_len 3, the entries will look like "b/e1", "b/e2", etc.
576  * @param flags Combination of GIT_PATH_DIR flags.
577  */
578 extern int git_path_dirload(
579 	git_vector *contents,
580 	const char *path,
581 	size_t prefix_len,
582 	uint32_t flags);
583 
584 
585 /* Used for paths to repositories on the filesystem */
586 extern bool git_path_is_local_file_url(const char *file_url);
587 extern int git_path_from_url_or_path(git_buf *local_path_out, const char *url_or_path);
588 
589 /* Flags to determine path validity in `git_path_isvalid` */
590 #define GIT_PATH_REJECT_TRAVERSAL          (1 << 0)
591 #define GIT_PATH_REJECT_DOT_GIT            (1 << 1)
592 #define GIT_PATH_REJECT_SLASH              (1 << 2)
593 #define GIT_PATH_REJECT_BACKSLASH          (1 << 3)
594 #define GIT_PATH_REJECT_TRAILING_DOT       (1 << 4)
595 #define GIT_PATH_REJECT_TRAILING_SPACE     (1 << 5)
596 #define GIT_PATH_REJECT_TRAILING_COLON     (1 << 6)
597 #define GIT_PATH_REJECT_DOS_PATHS          (1 << 7)
598 #define GIT_PATH_REJECT_NT_CHARS           (1 << 8)
599 #define GIT_PATH_REJECT_DOT_GIT_LITERAL    (1 << 9)
600 #define GIT_PATH_REJECT_DOT_GIT_HFS        (1 << 10)
601 #define GIT_PATH_REJECT_DOT_GIT_NTFS       (1 << 11)
602 
603 /* Default path safety for writing files to disk: since we use the
604  * Win32 "File Namespace" APIs ("\\?\") we need to protect from
605  * paths that the normal Win32 APIs would not write.
606  */
607 #ifdef GIT_WIN32
608 # define GIT_PATH_REJECT_FILESYSTEM_DEFAULTS \
609 	GIT_PATH_REJECT_TRAVERSAL | \
610 	GIT_PATH_REJECT_BACKSLASH | \
611 	GIT_PATH_REJECT_TRAILING_DOT | \
612 	GIT_PATH_REJECT_TRAILING_SPACE | \
613 	GIT_PATH_REJECT_TRAILING_COLON | \
614 	GIT_PATH_REJECT_DOS_PATHS | \
615 	GIT_PATH_REJECT_NT_CHARS
616 #else
617 # define GIT_PATH_REJECT_FILESYSTEM_DEFAULTS \
618 	GIT_PATH_REJECT_TRAVERSAL
619 #endif
620 
621  /* Paths that should never be written into the working directory. */
622 #define GIT_PATH_REJECT_WORKDIR_DEFAULTS \
623 	GIT_PATH_REJECT_FILESYSTEM_DEFAULTS | GIT_PATH_REJECT_DOT_GIT
624 
625 /* Paths that should never be written to the index. */
626 #define GIT_PATH_REJECT_INDEX_DEFAULTS \
627 	GIT_PATH_REJECT_TRAVERSAL | GIT_PATH_REJECT_DOT_GIT
628 
629 /*
630  * Determine whether a path is a valid git path or not - this must not contain
631  * a '.' or '..' component, or a component that is ".git" (in any case).
632  *
633  * `repo` is optional.  If specified, it will be used to determine the short
634  * path name to reject (if `GIT_PATH_REJECT_DOS_SHORTNAME` is specified),
635  * in addition to the default of "git~1".
636  */
637 extern bool git_path_isvalid(
638 	git_repository *repo,
639 	const char *path,
640 	uint16_t mode,
641 	unsigned int flags);
642 
643 /**
644  * Convert any backslashes into slashes
645  */
646 int git_path_normalize_slashes(git_buf *out, const char *path);
647 
648 bool git_path_supports_symlinks(const char *dir);
649 
650 /**
651  * Validate a system file's ownership
652  *
653  * Verify that the file in question is owned by an administrator or system
654  * account, or at least by the current user.
655  *
656  * This function returns 0 if successful. If the file is not owned by any of
657  * these, or any other if there have been problems determining the file
658  * ownership, it returns -1.
659  */
660 int git_path_validate_system_file_ownership(const char *path);
661 
662 #endif
663