xref: /dragonfly/contrib/xz/src/xz/suffix.c (revision a1282e19)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       suffix.c
4 /// \brief      Checks filename suffix and creates the destination filename
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 #ifdef __DJGPP__
16 #	include <fcntl.h>
17 #endif
18 
19 // For case-insensitive filename suffix on case-insensitive systems
20 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
21 #	define strcmp strcasecmp
22 #endif
23 
24 
25 static char *custom_suffix = NULL;
26 
27 
28 /// \brief      Test if the char is a directory separator
29 static bool
30 is_dir_sep(char c)
31 {
32 #ifdef TUKLIB_DOSLIKE
33 	return c == '/' || c == '\\' || c == ':';
34 #else
35 	return c == '/';
36 #endif
37 }
38 
39 
40 /// \brief      Test if the string contains a directory separator
41 static bool
42 has_dir_sep(const char *str)
43 {
44 #ifdef TUKLIB_DOSLIKE
45 	return strpbrk(str, "/\\:") != NULL;
46 #else
47 	return strchr(str, '/') != NULL;
48 #endif
49 }
50 
51 
52 #ifdef __DJGPP__
53 /// \brief      Test for special suffix used for 8.3 short filenames (SFN)
54 ///
55 /// \return     If str matches *.?- or *.??-, true is returned. Otherwise
56 ///             false is returned.
57 static bool
58 has_sfn_suffix(const char *str, size_t len)
59 {
60 	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
61 			&& !is_dir_sep(str[len - 2])) {
62 		// *.?-
63 		if (str[len - 3] == '.')
64 			return !is_dir_sep(str[len - 4]);
65 
66 		// *.??-
67 		if (len >= 5 && !is_dir_sep(str[len - 3])
68 				&& str[len - 4] == '.')
69 			return !is_dir_sep(str[len - 5]);
70 	}
71 
72 	return false;
73 }
74 #endif
75 
76 
77 /// \brief      Checks if src_name has given compressed_suffix
78 ///
79 /// \param      suffix      Filename suffix to look for
80 /// \param      src_name    Input filename
81 /// \param      src_len     strlen(src_name)
82 ///
83 /// \return     If src_name has the suffix, src_len - strlen(suffix) is
84 ///             returned. It's always a positive integer. Otherwise zero
85 ///             is returned.
86 static size_t
87 test_suffix(const char *suffix, const char *src_name, size_t src_len)
88 {
89 	const size_t suffix_len = strlen(suffix);
90 
91 	// The filename must have at least one character in addition to
92 	// the suffix. src_name may contain path to the filename, so we
93 	// need to check for directory separator too.
94 	if (src_len <= suffix_len
95 			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
96 		return 0;
97 
98 	if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
99 		return src_len - suffix_len;
100 
101 	return 0;
102 }
103 
104 
105 /// \brief      Removes the filename suffix of the compressed file
106 ///
107 /// \return     Name of the uncompressed file, or NULL if file has unknown
108 ///             suffix.
109 static char *
110 uncompressed_name(const char *src_name, const size_t src_len)
111 {
112 	static const struct {
113 		const char *compressed;
114 		const char *uncompressed;
115 	} suffixes[] = {
116 		{ ".xz",    "" },
117 		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
118 		{ ".lzma",  "" },
119 #ifdef __DJGPP__
120 		{ ".lzm",   "" },
121 #endif
122 		{ ".tlz",   ".tar" },
123 		// { ".gz",    "" },
124 		// { ".tgz",   ".tar" },
125 	};
126 
127 	const char *new_suffix = "";
128 	size_t new_len = 0;
129 
130 	if (opt_format == FORMAT_RAW) {
131 		// Don't check for known suffixes when --format=raw was used.
132 		if (custom_suffix == NULL) {
133 			message_error(_("%s: With --format=raw, "
134 					"--suffix=.SUF is required unless "
135 					"writing to stdout"), src_name);
136 			return NULL;
137 		}
138 	} else {
139 		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
140 			new_len = test_suffix(suffixes[i].compressed,
141 					src_name, src_len);
142 			if (new_len != 0) {
143 				new_suffix = suffixes[i].uncompressed;
144 				break;
145 			}
146 		}
147 
148 #ifdef __DJGPP__
149 		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
150 		// This is done also when long filenames are available
151 		// to keep it easy to decompress files created when
152 		// long filename support wasn't available.
153 		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
154 			new_suffix = "";
155 			new_len = src_len - 1;
156 		}
157 #endif
158 	}
159 
160 	if (new_len == 0 && custom_suffix != NULL)
161 		new_len = test_suffix(custom_suffix, src_name, src_len);
162 
163 	if (new_len == 0) {
164 		message_warning(_("%s: Filename has an unknown suffix, "
165 				"skipping"), src_name);
166 		return NULL;
167 	}
168 
169 	const size_t new_suffix_len = strlen(new_suffix);
170 	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
171 
172 	memcpy(dest_name, src_name, new_len);
173 	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
174 	dest_name[new_len + new_suffix_len] = '\0';
175 
176 	return dest_name;
177 }
178 
179 
180 /// This message is needed in multiple places in compressed_name(),
181 /// so the message has been put into its own function.
182 static void
183 msg_suffix(const char *src_name, const char *suffix)
184 {
185 	message_warning(_("%s: File already has `%s' suffix, skipping"),
186 			src_name, suffix);
187 	return;
188 }
189 
190 
191 /// \brief      Appends suffix to src_name
192 ///
193 /// In contrast to uncompressed_name(), we check only suffixes that are valid
194 /// for the specified file format.
195 static char *
196 compressed_name(const char *src_name, size_t src_len)
197 {
198 	// The order of these must match the order in args.h.
199 	static const char *const all_suffixes[][4] = {
200 		{
201 			".xz",
202 			".txz",
203 			NULL
204 		}, {
205 			".lzma",
206 #ifdef __DJGPP__
207 			".lzm",
208 #endif
209 			".tlz",
210 			NULL
211 /*
212 		}, {
213 			".gz",
214 			".tgz",
215 			NULL
216 */
217 		}, {
218 			// --format=raw requires specifying the suffix
219 			// manually or using stdout.
220 			NULL
221 		}
222 	};
223 
224 	// args.c ensures this.
225 	assert(opt_format != FORMAT_AUTO);
226 
227 	const size_t format = opt_format - 1;
228 	const char *const *suffixes = all_suffixes[format];
229 
230 	// Look for known filename suffixes and refuse to compress them.
231 	for (size_t i = 0; suffixes[i] != NULL; ++i) {
232 		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
233 			msg_suffix(src_name, suffixes[i]);
234 			return NULL;
235 		}
236 	}
237 
238 #ifdef __DJGPP__
239 	// Recognize also the special suffix that is used when long
240 	// filename (LFN) support isn't available. This suffix is
241 	// recognized on LFN systems too.
242 	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
243 		msg_suffix(src_name, "-");
244 		return NULL;
245 	}
246 #endif
247 
248 	if (custom_suffix != NULL) {
249 		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
250 			msg_suffix(src_name, custom_suffix);
251 			return NULL;
252 		}
253 	}
254 
255 	// TODO: Hmm, maybe it would be better to validate this in args.c,
256 	// since the suffix handling when decoding is weird now.
257 	if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
258 		message_error(_("%s: With --format=raw, "
259 				"--suffix=.SUF is required unless "
260 				"writing to stdout"), src_name);
261 		return NULL;
262 	}
263 
264 	const char *suffix = custom_suffix != NULL
265 			? custom_suffix : suffixes[0];
266 	size_t suffix_len = strlen(suffix);
267 
268 #ifdef __DJGPP__
269 	if (!_use_lfn(src_name)) {
270 		// Long filename (LFN) support isn't available and we are
271 		// limited to 8.3 short filenames (SFN).
272 		//
273 		// Look for suffix separator from the filename, and make sure
274 		// that it is in the filename, not in a directory name.
275 		const char *sufsep = strrchr(src_name, '.');
276 		if (sufsep == NULL || sufsep[1] == '\0'
277 				|| has_dir_sep(sufsep)) {
278 			// src_name has no filename extension.
279 			//
280 			// Examples:
281 			// xz foo         -> foo.xz
282 			// xz -F lzma foo -> foo.lzm
283 			// xz -S x foo    -> foox
284 			// xz -S x foo.   -> foo.x
285 			// xz -S x.y foo  -> foox.y
286 			// xz -S .x foo   -> foo.x
287 			// xz -S .x foo.  -> foo.x
288 			//
289 			// Avoid double dots:
290 			if (sufsep != NULL && sufsep[1] == '\0'
291 					&& suffix[0] == '.')
292 				--src_len;
293 
294 		} else if (custom_suffix == NULL
295 				&& strcasecmp(sufsep, ".tar") == 0) {
296 			// ".tar" is handled specially.
297 			//
298 			// Examples:
299 			// xz foo.tar          -> foo.txz
300 			// xz -F lzma foo.tar  -> foo.tlz
301 			static const char *const tar_suffixes[] = {
302 				".txz",
303 				".tlz",
304 				// ".tgz",
305 			};
306 			suffix = tar_suffixes[format];
307 			suffix_len = 4;
308 			src_len -= 4;
309 
310 		} else {
311 			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
312 				// Instead of the .xz suffix, use a single
313 				// character at the end of the filename
314 				// extension. This is to minimize name
315 				// conflicts when compressing multiple files
316 				// with the same basename. E.g. foo.txt and
317 				// foo.exe become foo.tx- and foo.ex-. Dash
318 				// is rare as the last character of the
319 				// filename extension, so it seems to be
320 				// quite safe choice and it stands out better
321 				// in directory listings than e.g. x. For
322 				// comparison, gzip uses z.
323 				suffix = "-";
324 				suffix_len = 1;
325 			}
326 
327 			if (suffix[0] == '.') {
328 				// The first character of the suffix is a dot.
329 				// Throw away the original filename extension
330 				// and replace it with the new suffix.
331 				//
332 				// Examples:
333 				// xz -F lzma foo.txt  -> foo.lzm
334 				// xz -S .x  foo.txt   -> foo.x
335 				src_len = sufsep - src_name;
336 
337 			} else {
338 				// The first character of the suffix is not
339 				// a dot. Preserve the first 0-2 characters
340 				// of the original filename extension.
341 				//
342 				// Examples:
343 				// xz foo.txt         -> foo.tx-
344 				// xz -S x  foo.c     -> foo.cx
345 				// xz -S ab foo.c     -> foo.cab
346 				// xz -S ab foo.txt   -> foo.tab
347 				// xz -S abc foo.txt  -> foo.abc
348 				//
349 				// Truncate the suffix to three chars:
350 				if (suffix_len > 3)
351 					suffix_len = 3;
352 
353 				// If needed, overwrite 1-3 characters.
354 				if (strlen(sufsep) > 4 - suffix_len)
355 					src_len = sufsep - src_name
356 							+ 4 - suffix_len;
357 			}
358 		}
359 	}
360 #endif
361 
362 	char *dest_name = xmalloc(src_len + suffix_len + 1);
363 
364 	memcpy(dest_name, src_name, src_len);
365 	memcpy(dest_name + src_len, suffix, suffix_len);
366 	dest_name[src_len + suffix_len] = '\0';
367 
368 	return dest_name;
369 }
370 
371 
372 extern char *
373 suffix_get_dest_name(const char *src_name)
374 {
375 	assert(src_name != NULL);
376 
377 	// Length of the name is needed in all cases to locate the end of
378 	// the string to compare the suffix, so calculate the length here.
379 	const size_t src_len = strlen(src_name);
380 
381 	return opt_mode == MODE_COMPRESS
382 			? compressed_name(src_name, src_len)
383 			: uncompressed_name(src_name, src_len);
384 }
385 
386 
387 extern void
388 suffix_set(const char *suffix)
389 {
390 	// Empty suffix and suffixes having a directory separator are
391 	// rejected. Such suffixes would break things later.
392 	if (suffix[0] == '\0' || has_dir_sep(suffix))
393 		message_fatal(_("%s: Invalid filename suffix"), suffix);
394 
395 	// Replace the old custom_suffix (if any) with the new suffix.
396 	free(custom_suffix);
397 	custom_suffix = xstrdup(suffix);
398 	return;
399 }
400