xref: /freebsd/contrib/xz/src/xz/suffix.c (revision 81ad6265)
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       suffix.c
4 /// \brief      Checks filename suffix and creates the destination filename
5 //
6 //  Author:     Lasse Collin
7 //
8 //  This file has been put into the public domain.
9 //  You can do whatever you want with this file.
10 //
11 ///////////////////////////////////////////////////////////////////////////////
12 
13 #include "private.h"
14 
15 #ifdef __DJGPP__
16 #	include <fcntl.h>
17 #endif
18 
19 // For case-insensitive filename suffix on case-insensitive systems
20 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
21 #	ifdef HAVE_STRINGS_H
22 #		include <strings.h>
23 #	endif
24 #	define strcmp strcasecmp
25 #endif
26 
27 
28 static char *custom_suffix = NULL;
29 
30 
31 /// \brief      Test if the char is a directory separator
32 static bool
33 is_dir_sep(char c)
34 {
35 #ifdef TUKLIB_DOSLIKE
36 	return c == '/' || c == '\\' || c == ':';
37 #else
38 	return c == '/';
39 #endif
40 }
41 
42 
43 /// \brief      Test if the string contains a directory separator
44 static bool
45 has_dir_sep(const char *str)
46 {
47 #ifdef TUKLIB_DOSLIKE
48 	return strpbrk(str, "/\\:") != NULL;
49 #else
50 	return strchr(str, '/') != NULL;
51 #endif
52 }
53 
54 
55 #ifdef __DJGPP__
56 /// \brief      Test for special suffix used for 8.3 short filenames (SFN)
57 ///
58 /// \return     If str matches *.?- or *.??-, true is returned. Otherwise
59 ///             false is returned.
60 static bool
61 has_sfn_suffix(const char *str, size_t len)
62 {
63 	if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
64 			&& !is_dir_sep(str[len - 2])) {
65 		// *.?-
66 		if (str[len - 3] == '.')
67 			return !is_dir_sep(str[len - 4]);
68 
69 		// *.??-
70 		if (len >= 5 && !is_dir_sep(str[len - 3])
71 				&& str[len - 4] == '.')
72 			return !is_dir_sep(str[len - 5]);
73 	}
74 
75 	return false;
76 }
77 #endif
78 
79 
80 /// \brief      Checks if src_name has given compressed_suffix
81 ///
82 /// \param      suffix      Filename suffix to look for
83 /// \param      src_name    Input filename
84 /// \param      src_len     strlen(src_name)
85 ///
86 /// \return     If src_name has the suffix, src_len - strlen(suffix) is
87 ///             returned. It's always a positive integer. Otherwise zero
88 ///             is returned.
89 static size_t
90 test_suffix(const char *suffix, const char *src_name, size_t src_len)
91 {
92 	const size_t suffix_len = strlen(suffix);
93 
94 	// The filename must have at least one character in addition to
95 	// the suffix. src_name may contain path to the filename, so we
96 	// need to check for directory separator too.
97 	if (src_len <= suffix_len
98 			|| is_dir_sep(src_name[src_len - suffix_len - 1]))
99 		return 0;
100 
101 	if (strcmp(suffix, src_name + src_len - suffix_len) == 0)
102 		return src_len - suffix_len;
103 
104 	return 0;
105 }
106 
107 
108 /// \brief      Removes the filename suffix of the compressed file
109 ///
110 /// \return     Name of the uncompressed file, or NULL if file has unknown
111 ///             suffix.
112 static char *
113 uncompressed_name(const char *src_name, const size_t src_len)
114 {
115 	static const struct {
116 		const char *compressed;
117 		const char *uncompressed;
118 	} suffixes[] = {
119 		{ ".xz",    "" },
120 		{ ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
121 		{ ".lzma",  "" },
122 #ifdef __DJGPP__
123 		{ ".lzm",   "" },
124 #endif
125 		{ ".tlz",   ".tar" }, // Both .tar.lzma and .tar.lz
126 #ifdef HAVE_LZIP_DECODER
127 		{ ".lz",    "" },
128 #endif
129 	};
130 
131 	const char *new_suffix = "";
132 	size_t new_len = 0;
133 
134 	if (opt_format == FORMAT_RAW) {
135 		// Don't check for known suffixes when --format=raw was used.
136 		if (custom_suffix == NULL) {
137 			message_error(_("%s: With --format=raw, "
138 					"--suffix=.SUF is required unless "
139 					"writing to stdout"), src_name);
140 			return NULL;
141 		}
142 	} else {
143 		for (size_t i = 0; i < ARRAY_SIZE(suffixes); ++i) {
144 			new_len = test_suffix(suffixes[i].compressed,
145 					src_name, src_len);
146 			if (new_len != 0) {
147 				new_suffix = suffixes[i].uncompressed;
148 				break;
149 			}
150 		}
151 
152 #ifdef __DJGPP__
153 		// Support also *.?- -> *.? and *.??- -> *.?? on DOS.
154 		// This is done also when long filenames are available
155 		// to keep it easy to decompress files created when
156 		// long filename support wasn't available.
157 		if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
158 			new_suffix = "";
159 			new_len = src_len - 1;
160 		}
161 #endif
162 	}
163 
164 	if (new_len == 0 && custom_suffix != NULL)
165 		new_len = test_suffix(custom_suffix, src_name, src_len);
166 
167 	if (new_len == 0) {
168 		message_warning(_("%s: Filename has an unknown suffix, "
169 				"skipping"), src_name);
170 		return NULL;
171 	}
172 
173 	const size_t new_suffix_len = strlen(new_suffix);
174 	char *dest_name = xmalloc(new_len + new_suffix_len + 1);
175 
176 	memcpy(dest_name, src_name, new_len);
177 	memcpy(dest_name + new_len, new_suffix, new_suffix_len);
178 	dest_name[new_len + new_suffix_len] = '\0';
179 
180 	return dest_name;
181 }
182 
183 
184 /// This message is needed in multiple places in compressed_name(),
185 /// so the message has been put into its own function.
186 static void
187 msg_suffix(const char *src_name, const char *suffix)
188 {
189 	message_warning(_("%s: File already has `%s' suffix, skipping"),
190 			src_name, suffix);
191 	return;
192 }
193 
194 
195 /// \brief      Appends suffix to src_name
196 ///
197 /// In contrast to uncompressed_name(), we check only suffixes that are valid
198 /// for the specified file format.
199 static char *
200 compressed_name(const char *src_name, size_t src_len)
201 {
202 	// The order of these must match the order in args.h.
203 	static const char *const all_suffixes[][4] = {
204 		{
205 			".xz",
206 			".txz",
207 			NULL
208 		}, {
209 			".lzma",
210 #ifdef __DJGPP__
211 			".lzm",
212 #endif
213 			".tlz",
214 			NULL
215 #ifdef HAVE_LZIP_DECODER
216 		// This is needed to keep the table indexing in sync with
217 		// enum format_type from coder.h.
218 		}, {
219 /*
220 			".lz",
221 */
222 			NULL
223 #endif
224 		}, {
225 			// --format=raw requires specifying the suffix
226 			// manually or using stdout.
227 			NULL
228 		}
229 	};
230 
231 	// args.c ensures these.
232 	assert(opt_format != FORMAT_AUTO);
233 #ifdef HAVE_LZIP_DECODER
234 	assert(opt_format != FORMAT_LZIP);
235 #endif
236 
237 	const size_t format = opt_format - 1;
238 	const char *const *suffixes = all_suffixes[format];
239 
240 	// Look for known filename suffixes and refuse to compress them.
241 	for (size_t i = 0; suffixes[i] != NULL; ++i) {
242 		if (test_suffix(suffixes[i], src_name, src_len) != 0) {
243 			msg_suffix(src_name, suffixes[i]);
244 			return NULL;
245 		}
246 	}
247 
248 #ifdef __DJGPP__
249 	// Recognize also the special suffix that is used when long
250 	// filename (LFN) support isn't available. This suffix is
251 	// recognized on LFN systems too.
252 	if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
253 		msg_suffix(src_name, "-");
254 		return NULL;
255 	}
256 #endif
257 
258 	if (custom_suffix != NULL) {
259 		if (test_suffix(custom_suffix, src_name, src_len) != 0) {
260 			msg_suffix(src_name, custom_suffix);
261 			return NULL;
262 		}
263 	}
264 
265 	// TODO: Hmm, maybe it would be better to validate this in args.c,
266 	// since the suffix handling when decoding is weird now.
267 	if (opt_format == FORMAT_RAW && custom_suffix == NULL) {
268 		message_error(_("%s: With --format=raw, "
269 				"--suffix=.SUF is required unless "
270 				"writing to stdout"), src_name);
271 		return NULL;
272 	}
273 
274 	const char *suffix = custom_suffix != NULL
275 			? custom_suffix : suffixes[0];
276 	size_t suffix_len = strlen(suffix);
277 
278 #ifdef __DJGPP__
279 	if (!_use_lfn(src_name)) {
280 		// Long filename (LFN) support isn't available and we are
281 		// limited to 8.3 short filenames (SFN).
282 		//
283 		// Look for suffix separator from the filename, and make sure
284 		// that it is in the filename, not in a directory name.
285 		const char *sufsep = strrchr(src_name, '.');
286 		if (sufsep == NULL || sufsep[1] == '\0'
287 				|| has_dir_sep(sufsep)) {
288 			// src_name has no filename extension.
289 			//
290 			// Examples:
291 			// xz foo         -> foo.xz
292 			// xz -F lzma foo -> foo.lzm
293 			// xz -S x foo    -> foox
294 			// xz -S x foo.   -> foo.x
295 			// xz -S x.y foo  -> foox.y
296 			// xz -S .x foo   -> foo.x
297 			// xz -S .x foo.  -> foo.x
298 			//
299 			// Avoid double dots:
300 			if (sufsep != NULL && sufsep[1] == '\0'
301 					&& suffix[0] == '.')
302 				--src_len;
303 
304 		} else if (custom_suffix == NULL
305 				&& strcasecmp(sufsep, ".tar") == 0) {
306 			// ".tar" is handled specially.
307 			//
308 			// Examples:
309 			// xz foo.tar          -> foo.txz
310 			// xz -F lzma foo.tar  -> foo.tlz
311 			static const char *const tar_suffixes[] = {
312 				".txz", // .tar.xz
313 				".tlz", // .tar.lzma
314 /*
315 				".tlz", // .tar.lz
316 */
317 			};
318 			suffix = tar_suffixes[format];
319 			suffix_len = 4;
320 			src_len -= 4;
321 
322 		} else {
323 			if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
324 				// Instead of the .xz suffix, use a single
325 				// character at the end of the filename
326 				// extension. This is to minimize name
327 				// conflicts when compressing multiple files
328 				// with the same basename. E.g. foo.txt and
329 				// foo.exe become foo.tx- and foo.ex-. Dash
330 				// is rare as the last character of the
331 				// filename extension, so it seems to be
332 				// quite safe choice and it stands out better
333 				// in directory listings than e.g. x. For
334 				// comparison, gzip uses z.
335 				suffix = "-";
336 				suffix_len = 1;
337 			}
338 
339 			if (suffix[0] == '.') {
340 				// The first character of the suffix is a dot.
341 				// Throw away the original filename extension
342 				// and replace it with the new suffix.
343 				//
344 				// Examples:
345 				// xz -F lzma foo.txt  -> foo.lzm
346 				// xz -S .x  foo.txt   -> foo.x
347 				src_len = sufsep - src_name;
348 
349 			} else {
350 				// The first character of the suffix is not
351 				// a dot. Preserve the first 0-2 characters
352 				// of the original filename extension.
353 				//
354 				// Examples:
355 				// xz foo.txt         -> foo.tx-
356 				// xz -S x  foo.c     -> foo.cx
357 				// xz -S ab foo.c     -> foo.cab
358 				// xz -S ab foo.txt   -> foo.tab
359 				// xz -S abc foo.txt  -> foo.abc
360 				//
361 				// Truncate the suffix to three chars:
362 				if (suffix_len > 3)
363 					suffix_len = 3;
364 
365 				// If needed, overwrite 1-3 characters.
366 				if (strlen(sufsep) > 4 - suffix_len)
367 					src_len = sufsep - src_name
368 							+ 4 - suffix_len;
369 			}
370 		}
371 	}
372 #endif
373 
374 	char *dest_name = xmalloc(src_len + suffix_len + 1);
375 
376 	memcpy(dest_name, src_name, src_len);
377 	memcpy(dest_name + src_len, suffix, suffix_len);
378 	dest_name[src_len + suffix_len] = '\0';
379 
380 	return dest_name;
381 }
382 
383 
384 extern char *
385 suffix_get_dest_name(const char *src_name)
386 {
387 	assert(src_name != NULL);
388 
389 	// Length of the name is needed in all cases to locate the end of
390 	// the string to compare the suffix, so calculate the length here.
391 	const size_t src_len = strlen(src_name);
392 
393 	return opt_mode == MODE_COMPRESS
394 			? compressed_name(src_name, src_len)
395 			: uncompressed_name(src_name, src_len);
396 }
397 
398 
399 extern void
400 suffix_set(const char *suffix)
401 {
402 	// Empty suffix and suffixes having a directory separator are
403 	// rejected. Such suffixes would break things later.
404 	if (suffix[0] == '\0' || has_dir_sep(suffix))
405 		message_fatal(_("%s: Invalid filename suffix"), suffix);
406 
407 	// Replace the old custom_suffix (if any) with the new suffix.
408 	free(custom_suffix);
409 	custom_suffix = xstrdup(suffix);
410 	return;
411 }
412