1 /**************************************************************************
2 Copyright:
3 (C) 2008 - 2013 Alexander Shaduri <ashaduri 'at' gmail.com>
4 License: See LICENSE_zlib.txt file
5 ***************************************************************************/
6 /// \file
7 /// \author Alexander Shaduri
8 /// \ingroup hz
9 /// \weakgroup hz
10 /// @{
11
12 #ifndef HZ_FS_PATH_UTILS_H
13 #define HZ_FS_PATH_UTILS_H
14
15 #include "hz_config.h" // feature macros
16
17 #include <string>
18
19 #if !defined _WIN32
20 #include <libgen.h> // dirname, basename
21 #include <cstring> // strncpy
22 #include <cstddef> // std::size_t
23 #endif
24
25 #include "fs_common.h" // separator
26
27
28 /**
29 \file
30 Filesystem path string manipulation.
31 For windows, always supply utf-8 or current locale-encoded strings.
32 Paths like \\.\ and \\?\ are not supported for windows (yet).
33 */
34
35 namespace hz {
36
37
38
39 /// Convert path from unknown format to native (e.g. unix paths to win32).
40 /// Same as FsPath(path).to_native().str()
41 inline std::string path_to_native(const std::string& path);
42
43 /// Remove trailing slashes in path (unless they are part of root component).
44 inline std::string path_trim_trailing_separators(const std::string& path);
45
46 /// Check if the path is absolute (only for native paths). Return 0 if it's not.
47 /// The returned value is a position past the root component (e.g. 3 for C:\\temp).
48 inline std::string::size_type path_is_absolute(const std::string& path);
49
50
51 /// Get the path truncated by 1 level, e.g. /usr/local/ -> /usr.
52 inline std::string path_get_dirname(const std::string& path);
53
54 /// Get the basename of path, e.g. /usr/local/ -> local; /a/b -> b.
55 inline std::string path_get_basename(const std::string& path);
56
57 /// Get root path of current path. e.g. '/' or 'D:\'.
58 /// May not work with relative paths under win32.
59 inline std::string path_get_root(const std::string& path);
60
61
62 /// Change the supplied filename so that it's safe to create it
63 /// (remove any potentially harmful characters from it).
64 inline std::string filename_make_safe(const std::string& filename);
65
66 /// Change the supplied path so that it's safe to create it
67 /// (remove any potentially harmful characters from it).
68 inline std::string path_make_safe(const std::string& path);
69
70
71
72
73
74 // ------------------------------------------- Implementation
75
76
77
path_to_native(const std::string & path)78 inline std::string path_to_native(const std::string& path)
79 {
80 std::string s(path);
81 std::string::size_type pos = 0;
82
83 char from = '\\';
84 if (DIR_SEPARATOR == '\\')
85 from = '/';
86
87 while ((pos = s.find(from, pos)) != std::string::npos) {
88 s[pos] = DIR_SEPARATOR;
89 ++pos;
90 }
91
92 return s;
93 }
94
95
96
path_trim_trailing_separators(const std::string & path)97 inline std::string path_trim_trailing_separators(const std::string& path)
98 {
99 std::string::size_type apos = path_is_absolute(path); // first position of non-abs portion
100 if (apos >= path.size()) // / or similar
101 return path;
102
103 std::string::size_type pos = path.find_last_not_of(DIR_SEPARATOR); // remove trailing slashes
104 if (pos == std::string::npos) // / ?
105 return path.substr(0, apos);
106
107 return path.substr(0, pos + 1);
108 }
109
110
111
path_is_absolute(const std::string & path)112 inline std::string::size_type path_is_absolute(const std::string& path)
113 {
114 #ifndef _WIN32
115 if (!path.empty() && path[0] == '/')
116 return 1;
117
118 #else // win32
119 if (path.size() >= 3 && path.substr(1, 2) == ":\\") // 'D:\'
120 return 3;
121
122 if (path.size() >= 4 && path.substr(0, 2) == "\\\\") { // '\\host\'
123 std::string::size_type pos = path.rfind('\\');
124 if (pos >= 3 && pos != std::string::npos)
125 return pos + 1;
126 }
127 #endif
128
129 return 0;
130 }
131
132
133
path_get_dirname(const std::string & path)134 inline std::string path_get_dirname(const std::string& path)
135 {
136 // GLib (as of 2.14.1) has a bug in g_path_get_dirname() implementation:
137 // "/usr/local/" returns "/usr/local", not "/usr". Don't use it.
138
139 #if !defined _WIN32
140 std::size_t buf_size = path.size() + 1;
141 char* buf = new char[buf_size];
142 std::strncpy(buf, path.c_str(), buf_size);
143 std::string ret = dirname(buf); // dirname may modify buf, that's why we needed a copy of it.
144 delete[] buf;
145 return ret;
146
147 #else
148 if (path.empty())
149 return ".";
150
151 std::string::size_type apos = path_is_absolute(path); // first position of non-abs portion
152 if (apos >= path.size()) // / or similar
153 return path;
154
155 std::string::size_type pos2 = path.find_last_not_of(DIR_SEPARATOR); // remove trailing slashes
156 if (pos2 == std::string::npos) // / ?
157 return path.substr(0, apos);
158
159 std::string::size_type pos1 = path.find_last_of(DIR_SEPARATOR, pos2); // next slash from the end
160 if (pos1 == std::string::npos) {
161 return "."; // one-component relative dir
162 }
163 if (apos && pos1 == apos - 1) { // it's a root subdir
164 return path.substr(0, apos);
165 }
166
167 pos1 = path.find_last_not_of(DIR_SEPARATOR, pos1); // skip duplicate slashes
168 if (pos1 == std::string::npos && apos) // it's root subdir
169 return path.substr(0, apos);
170
171 std::string dir = path.substr(0, pos1+1);
172 if (dir.empty())
173 return ".";
174
175 return dir;
176 #endif
177 }
178
179
180
path_get_basename(const std::string & path)181 inline std::string path_get_basename(const std::string& path)
182 {
183 #if !defined _WIN32
184 std::size_t buf_size = path.size() + 1;
185 char* buf = new char[buf_size];
186 std::strncpy(buf, path.c_str(), buf_size);
187 std::string ret = basename(buf); // basename may modify buf, that's why we needed a copy of it.
188 delete[] buf;
189 return ret;
190
191 #else
192 if (path.empty())
193 return ".";
194
195 std::string::size_type apos = path_is_absolute(path); // first position of non-abs portion
196 if (apos >= path.size()) // / or similar
197 return path; // / -> /, as per basename manpage
198
199 std::string::size_type pos2 = path.find_last_not_of(DIR_SEPARATOR); // remove trailing slashes
200 std::string::size_type pos1 = path.find_last_of(DIR_SEPARATOR, pos2);
201 pos1 = (pos1 == std::string::npos ? 0 : (pos1 + 1));
202 pos2 = (pos2 == std::string::npos ? path.size() : (pos2 + 1));
203
204 return path.substr(pos1, pos2 - pos1);
205 #endif
206 }
207
208
209
path_get_root(const std::string & path)210 inline std::string path_get_root(const std::string& path)
211 {
212 #if !defined _WIN32
213 return "/"; // easy
214
215 #else // hard
216 // don't use path_is_absolute(), we have slightly more error-checking.
217 if (path.size() >= 3 && path.substr(1, 2) == ":\\") // 'D:\'
218 return path.substr(0, 3);
219
220 if (path.size() >= 4 && path.substr(0, 2) == "\\\\") { // '\\host\', '\\.\', '\\?\'
221 std::string::size_type pos = path.rfind('\\');
222 if (pos >= 3 && pos != std::string::npos)
223 return path.substr(0, pos+1);
224 }
225 return std::string(); // cannot detect
226 #endif
227 }
228
229
230
path_compress(const std::string & path)231 inline std::string path_compress(const std::string& path)
232 {
233 std::string::size_type rel_pos = path_is_absolute(path);
234 std::string rel = path.substr(rel_pos); // retrieve relative component only
235
236 std::string::size_type curr = 0, last = 0;
237 std::string::size_type end = rel.size();
238 std::string result, component;
239
240 while (true) {
241 if (last >= end) // last is past the end
242 break;
243
244 curr = rel.find(DIR_SEPARATOR, last);
245 if (curr != last) {
246 component = rel.substr(last, (curr == std::string::npos ? curr : (curr - last)));
247
248 if (component == ".") {
249 if (result == "" && rel_pos == 0) // don't un-relativise
250 result += (std::string(".") + DIR_SEPARATOR_S);
251 // else, nothing
252
253 } else if (component == "..") {
254 // retain ".." when previous component is ".." or ".".
255 if (result == "" || result == (std::string(".") + DIR_SEPARATOR_S)
256 || (result.size() >= 3 && result.substr(result.size() - 3) == (std::string("..") + DIR_SEPARATOR_S))) {
257 result += (std::string("..") + DIR_SEPARATOR_S);
258
259 } else {
260 // std::cerr << "Getting dirname on \"" << result << "\"\n";
261 std::string up = path_get_dirname(result); // go up
262 if (up == ".") {
263 result = "";
264 } else {
265 result = (up + DIR_SEPARATOR_S);
266 }
267 }
268
269 } else {
270 result += (component + DIR_SEPARATOR_S);
271 }
272 }
273
274 if (curr == std::string::npos)
275 break;
276 last = curr + 1;
277 }
278
279 return path_trim_trailing_separators(path.substr(0, rel_pos) + result);
280 }
281
282
283
filename_make_safe(const std::string & filename)284 inline std::string filename_make_safe(const std::string& filename)
285 {
286 std::string s(filename);
287 std::string::size_type pos = 0;
288 while ((pos = s.find_first_not_of(
289 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890._-",
290 pos)) != std::string::npos) {
291 s[pos] = '_';
292 ++pos;
293 }
294 // win32 kernel (heh) has trouble with space and dot-ending files
295 if (!s.empty() && (s[s.size() - 1] == '.' || s[s.size() - 1] == ' ')) {
296 s[s.size() - 1] = '_';
297 }
298 return s;
299 }
300
301
302
path_make_safe(const std::string & path)303 inline std::string path_make_safe(const std::string& path)
304 {
305 std::string s(path);
306 std::string::size_type pos = 0;
307 while ((pos = s.find_first_not_of(
308 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890._-",
309 pos)) != std::string::npos) {
310 if (s[pos] != DIR_SEPARATOR)
311 s[pos] = '_';
312 ++pos;
313 }
314 // win32 kernel (heh) has trouble with space and dot-ending files
315 if (!s.empty() && (s[s.size() - 1] == '.' || s[s.size() - 1] == ' ')) {
316 s[s.size() - 1] = '_';
317 }
318 return s;
319 }
320
321
322
323
324
325 } // ns hz
326
327
328
329 #endif
330
331 /// @}
332