1 /**************************************************************************
2  Copyright:
3       (C) 2008 - 2013  Alexander Shaduri <ashaduri 'at' gmail.com>
4  License: See LICENSE_zlib.txt file
5 ***************************************************************************/
6 /// \file
7 /// \author Alexander Shaduri
8 /// \ingroup hz
9 /// \weakgroup hz
10 /// @{
11 
12 #ifndef HZ_FS_PATH_UTILS_H
13 #define HZ_FS_PATH_UTILS_H
14 
15 #include "hz_config.h"  // feature macros
16 
17 #include <string>
18 
19 #if !defined _WIN32
20 	#include <libgen.h>  // dirname, basename
21 	#include <cstring>  // strncpy
22 	#include <cstddef>  // std::size_t
23 #endif
24 
25 #include "fs_common.h"  // separator
26 
27 
28 /**
29 \file
30 Filesystem path string manipulation.
31 For windows, always supply utf-8 or current locale-encoded strings.
32 Paths like \\.\ and \\?\ are not supported for windows (yet).
33 */
34 
35 namespace hz {
36 
37 
38 
39 /// Convert path from unknown format to native (e.g. unix paths to win32).
40 /// Same as FsPath(path).to_native().str()
41 inline std::string path_to_native(const std::string& path);
42 
43 /// Remove trailing slashes in path (unless they are part of root component).
44 inline std::string path_trim_trailing_separators(const std::string& path);
45 
46 /// Check if the path is absolute (only for native paths). Return 0 if it's not.
47 /// The returned value is a position past the root component (e.g. 3 for C:\\temp).
48 inline std::string::size_type path_is_absolute(const std::string& path);
49 
50 
51 /// Get the path truncated by 1 level, e.g. /usr/local/ -> /usr.
52 inline std::string path_get_dirname(const std::string& path);
53 
54 /// Get the basename of path, e.g. /usr/local/ -> local; /a/b -> b.
55 inline std::string path_get_basename(const std::string& path);
56 
57 /// Get root path of current path. e.g. '/' or 'D:\'.
58 /// May not work with relative paths under win32.
59 inline std::string path_get_root(const std::string& path);
60 
61 
62 /// Change the supplied filename so that it's safe to create it
63 /// (remove any potentially harmful characters from it).
64 inline std::string filename_make_safe(const std::string& filename);
65 
66 /// Change the supplied path so that it's safe to create it
67 /// (remove any potentially harmful characters from it).
68 inline std::string path_make_safe(const std::string& path);
69 
70 
71 
72 
73 
74 // ------------------------------------------- Implementation
75 
76 
77 
path_to_native(const std::string & path)78 inline std::string path_to_native(const std::string& path)
79 {
80 	std::string s(path);
81 	std::string::size_type pos = 0;
82 
83 	char from = '\\';
84 	if (DIR_SEPARATOR == '\\')
85 		from = '/';
86 
87 	while ((pos = s.find(from, pos)) != std::string::npos) {
88 		s[pos] = DIR_SEPARATOR;
89 		++pos;
90 	}
91 
92 	return s;
93 }
94 
95 
96 
path_trim_trailing_separators(const std::string & path)97 inline std::string path_trim_trailing_separators(const std::string& path)
98 {
99 	std::string::size_type apos = path_is_absolute(path);  // first position of non-abs portion
100 	if (apos >= path.size())  // / or similar
101 		return path;
102 
103 	std::string::size_type pos = path.find_last_not_of(DIR_SEPARATOR);  // remove trailing slashes
104 	if (pos == std::string::npos)  // / ?
105 		return path.substr(0, apos);
106 
107 	return path.substr(0, pos + 1);
108 }
109 
110 
111 
path_is_absolute(const std::string & path)112 inline std::string::size_type path_is_absolute(const std::string& path)
113 {
114 #ifndef _WIN32
115 	if (!path.empty() && path[0] == '/')
116 		return 1;
117 
118 #else  // win32
119 	if (path.size() >= 3 && path.substr(1, 2) == ":\\")  // 'D:\'
120 		return 3;
121 
122 	if (path.size() >= 4 && path.substr(0, 2) == "\\\\") {  // '\\host\'
123 		std::string::size_type pos = path.rfind('\\');
124 		if (pos >= 3 && pos != std::string::npos)
125 			return pos + 1;
126 	}
127 #endif
128 
129 	return 0;
130 }
131 
132 
133 
path_get_dirname(const std::string & path)134 inline std::string path_get_dirname(const std::string& path)
135 {
136 // GLib (as of 2.14.1) has a bug in g_path_get_dirname() implementation:
137 // "/usr/local/" returns "/usr/local", not "/usr". Don't use it.
138 
139 #if !defined _WIN32
140 	std::size_t buf_size = path.size() + 1;
141 	char* buf = new char[buf_size];
142 	std::strncpy(buf, path.c_str(), buf_size);
143 	std::string ret = dirname(buf);  // dirname may modify buf, that's why we needed a copy of it.
144 	delete[] buf;
145 	return ret;
146 
147 #else
148 	if (path.empty())
149 		return ".";
150 
151 	std::string::size_type apos = path_is_absolute(path);  // first position of non-abs portion
152 	if (apos >= path.size())  // / or similar
153 		return path;
154 
155 	std::string::size_type pos2 = path.find_last_not_of(DIR_SEPARATOR);  // remove trailing slashes
156 	if (pos2 == std::string::npos)  // / ?
157 		return path.substr(0, apos);
158 
159 	std::string::size_type pos1 = path.find_last_of(DIR_SEPARATOR, pos2);  // next slash from the end
160 	if (pos1 == std::string::npos) {
161 		return ".";  // one-component relative dir
162 	}
163 	if (apos && pos1 == apos - 1) {  // it's a root subdir
164 		return path.substr(0, apos);
165 	}
166 
167 	pos1 = path.find_last_not_of(DIR_SEPARATOR, pos1);  // skip duplicate slashes
168 	if (pos1 == std::string::npos && apos)  // it's root subdir
169 		return path.substr(0, apos);
170 
171 	std::string dir = path.substr(0, pos1+1);
172 	if (dir.empty())
173 		return ".";
174 
175 	return dir;
176 #endif
177 }
178 
179 
180 
path_get_basename(const std::string & path)181 inline std::string path_get_basename(const std::string& path)
182 {
183 #if !defined _WIN32
184 	std::size_t buf_size = path.size() + 1;
185 	char* buf = new char[buf_size];
186 	std::strncpy(buf, path.c_str(), buf_size);
187 	std::string ret = basename(buf);  // basename may modify buf, that's why we needed a copy of it.
188 	delete[] buf;
189 	return ret;
190 
191 #else
192 	if (path.empty())
193 		return ".";
194 
195 	std::string::size_type apos = path_is_absolute(path);  // first position of non-abs portion
196 	if (apos >= path.size())  // / or similar
197 		return path;  // / -> /, as per basename manpage
198 
199 	std::string::size_type pos2 = path.find_last_not_of(DIR_SEPARATOR);  // remove trailing slashes
200 	std::string::size_type pos1 = path.find_last_of(DIR_SEPARATOR, pos2);
201 	pos1 = (pos1 == std::string::npos ? 0 : (pos1 + 1));
202 	pos2 = (pos2 == std::string::npos ? path.size() : (pos2 + 1));
203 
204 	return path.substr(pos1, pos2 - pos1);
205 #endif
206 }
207 
208 
209 
path_get_root(const std::string & path)210 inline std::string path_get_root(const std::string& path)
211 {
212 #if !defined _WIN32
213 	return "/";  // easy
214 
215 #else  // hard
216 	// don't use path_is_absolute(), we have slightly more error-checking.
217 	if (path.size() >= 3 && path.substr(1, 2) == ":\\")  // 'D:\'
218 		return path.substr(0, 3);
219 
220 	if (path.size() >= 4 && path.substr(0, 2) == "\\\\") {  // '\\host\', '\\.\', '\\?\'
221 		std::string::size_type pos = path.rfind('\\');
222 		if (pos >= 3 && pos != std::string::npos)
223 			return path.substr(0, pos+1);
224 	}
225 	return std::string();  // cannot detect
226 #endif
227 }
228 
229 
230 
path_compress(const std::string & path)231 inline std::string path_compress(const std::string& path)
232 {
233 	std::string::size_type rel_pos = path_is_absolute(path);
234 	std::string rel = path.substr(rel_pos);  // retrieve relative component only
235 
236 	std::string::size_type curr = 0, last = 0;
237 	std::string::size_type end = rel.size();
238 	std::string result, component;
239 
240 	while (true) {
241 		if (last >= end)  // last is past the end
242 			break;
243 
244 		curr = rel.find(DIR_SEPARATOR, last);
245 		if (curr != last) {
246 			component = rel.substr(last, (curr == std::string::npos ? curr : (curr - last)));
247 
248 			if (component == ".") {
249 				if (result == "" && rel_pos == 0)  // don't un-relativise
250 					result += (std::string(".") + DIR_SEPARATOR_S);
251 				// else, nothing
252 
253 			} else if (component == "..") {
254 				// retain ".." when previous component is ".." or ".".
255 				if (result == "" || result == (std::string(".") + DIR_SEPARATOR_S)
256 						|| (result.size() >= 3 && result.substr(result.size() - 3) == (std::string("..") + DIR_SEPARATOR_S))) {
257 					result += (std::string("..") + DIR_SEPARATOR_S);
258 
259 				} else {
260 					// std::cerr << "Getting dirname on \"" << result << "\"\n";
261 					std::string up = path_get_dirname(result);  // go up
262 					if (up == ".") {
263 						result = "";
264 					} else {
265 						result = (up + DIR_SEPARATOR_S);
266 					}
267 				}
268 
269 			} else {
270 				result += (component + DIR_SEPARATOR_S);
271 			}
272 		}
273 
274 		if (curr == std::string::npos)
275 			break;
276 		last = curr + 1;
277 	}
278 
279 	return path_trim_trailing_separators(path.substr(0, rel_pos) + result);
280 }
281 
282 
283 
filename_make_safe(const std::string & filename)284 inline std::string filename_make_safe(const std::string& filename)
285 {
286 	std::string s(filename);
287 	std::string::size_type pos = 0;
288 	while ((pos = s.find_first_not_of(
289 			"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890._-",
290 			pos)) != std::string::npos) {
291 		s[pos] = '_';
292 		++pos;
293 	}
294 	// win32 kernel (heh) has trouble with space and dot-ending files
295 	if (!s.empty() && (s[s.size() - 1] == '.' || s[s.size() - 1] == ' ')) {
296 		s[s.size() - 1] = '_';
297 	}
298 	return s;
299 }
300 
301 
302 
path_make_safe(const std::string & path)303 inline std::string path_make_safe(const std::string& path)
304 {
305 	std::string s(path);
306 	std::string::size_type pos = 0;
307 	while ((pos = s.find_first_not_of(
308 			"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890._-",
309 			pos)) != std::string::npos) {
310 		if (s[pos] != DIR_SEPARATOR)
311 			s[pos] = '_';
312 		++pos;
313 	}
314 	// win32 kernel (heh) has trouble with space and dot-ending files
315 	if (!s.empty() && (s[s.size() - 1] == '.' || s[s.size() - 1] == ' ')) {
316 		s[s.size() - 1] = '_';
317 	}
318 	return s;
319 }
320 
321 
322 
323 
324 
325 }  // ns hz
326 
327 
328 
329 #endif
330 
331 /// @}
332