1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27 
28 #define __LIBARCHIVE_TEST
29 #include "archive_pathmatch.h"
30 
31 /*
32  * Verify that the pattern matcher implements the wildcard logic specified
33  * in SUSv2 for the cpio command.  This is essentially the
34  * shell glob syntax:
35  *   * - matches any sequence of chars, including '/'
36  *   ? - matches any single char, including '/'
37  *   [...] - matches any of a set of chars, '-' specifies a range,
38  *        initial '!' is undefined
39  *
40  * The specification in SUSv2 is a bit incomplete, I assume the following:
41  *   Trailing '-' in [...] is not special.
42  *
43  * TODO: Figure out if there's a good way to extend this to handle
44  * Windows paths that use '\' as a path separator.  <sigh>
45  */
46 
47 DEFINE_TEST(test_archive_pathmatch)
48 {
49 	assertEqualInt(1, archive_pathmatch("a/b/c", "a/b/c", 0));
50 	assertEqualInt(0, archive_pathmatch("a/b/", "a/b/c", 0));
51 	assertEqualInt(0, archive_pathmatch("a/b", "a/b/c", 0));
52 	assertEqualInt(0, archive_pathmatch("a/b/c", "a/b/", 0));
53 	assertEqualInt(0, archive_pathmatch("a/b/c", "a/b", 0));
54 
55     /* Null string and non-empty pattern returns false. */
56 	assertEqualInt(0, archive_pathmatch("a/b/c", NULL, 0));
57 	assertEqualInt(0, archive_pathmatch_w(L"a/b/c", NULL, 0));
58 
59 	/* Empty pattern only matches empty string. */
60 	assertEqualInt(1, archive_pathmatch("","", 0));
61 	assertEqualInt(0, archive_pathmatch("","a", 0));
62 	assertEqualInt(1, archive_pathmatch("*","", 0));
63 	assertEqualInt(1, archive_pathmatch("*","a", 0));
64 	assertEqualInt(1, archive_pathmatch("*","abcd", 0));
65 	/* SUSv2: * matches / */
66 	assertEqualInt(1, archive_pathmatch("*","abcd/efgh/ijkl", 0));
67 	assertEqualInt(1, archive_pathmatch("abcd*efgh/ijkl","abcd/efgh/ijkl", 0));
68 	assertEqualInt(1, archive_pathmatch("abcd***efgh/ijkl","abcd/efgh/ijkl", 0));
69 	assertEqualInt(1, archive_pathmatch("abcd***/efgh/ijkl","abcd/efgh/ijkl", 0));
70 	assertEqualInt(0, archive_pathmatch("?", "", 0));
71 	assertEqualInt(0, archive_pathmatch("?", "\0", 0));
72 	assertEqualInt(1, archive_pathmatch("?", "a", 0));
73 	assertEqualInt(0, archive_pathmatch("?", "ab", 0));
74 	assertEqualInt(1, archive_pathmatch("?", ".", 0));
75 	assertEqualInt(1, archive_pathmatch("?", "?", 0));
76 	assertEqualInt(1, archive_pathmatch("a", "a", 0));
77 	assertEqualInt(0, archive_pathmatch("a", "ab", 0));
78 	assertEqualInt(0, archive_pathmatch("a", "ab", 0));
79 	assertEqualInt(1, archive_pathmatch("a?c", "abc", 0));
80 	/* SUSv2: ? matches / */
81 	assertEqualInt(1, archive_pathmatch("a?c", "a/c", 0));
82 	assertEqualInt(1, archive_pathmatch("a?*c*", "a/c", 0));
83 	assertEqualInt(1, archive_pathmatch("*a*", "a/c", 0));
84 	assertEqualInt(1, archive_pathmatch("*a*", "/a/c", 0));
85 	assertEqualInt(1, archive_pathmatch("*a*", "defaaaaaaa", 0));
86 	assertEqualInt(0, archive_pathmatch("a*", "defghi", 0));
87 	assertEqualInt(0, archive_pathmatch("*a*", "defghi", 0));
88 
89 	/* Character classes */
90 	assertEqualInt(1, archive_pathmatch("abc[def", "abc[def", 0));
91 	assertEqualInt(0, archive_pathmatch("abc[def]", "abc[def", 0));
92 	assertEqualInt(0, archive_pathmatch("abc[def", "abcd", 0));
93 	assertEqualInt(1, archive_pathmatch("abc[def]", "abcd", 0));
94 	assertEqualInt(1, archive_pathmatch("abc[def]", "abce", 0));
95 	assertEqualInt(1, archive_pathmatch("abc[def]", "abcf", 0));
96 	assertEqualInt(0, archive_pathmatch("abc[def]", "abcg", 0));
97 	assertEqualInt(1, archive_pathmatch("abc[d*f]", "abcd", 0));
98 	assertEqualInt(1, archive_pathmatch("abc[d*f]", "abc*", 0));
99 	assertEqualInt(0, archive_pathmatch("abc[d*f]", "abcdefghi", 0));
100 	assertEqualInt(0, archive_pathmatch("abc[d*", "abcdefghi", 0));
101 	assertEqualInt(1, archive_pathmatch("abc[d*", "abc[defghi", 0));
102 	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abcd", 0));
103 	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abce", 0));
104 	assertEqualInt(1, archive_pathmatch("abc[d-f]", "abcf", 0));
105 	assertEqualInt(0, archive_pathmatch("abc[d-f]", "abcg", 0));
106 	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abca", 0));
107 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcd", 0));
108 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abce", 0));
109 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcf", 0));
110 	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abcg", 0));
111 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abch", 0));
112 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abci", 0));
113 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abcj", 0));
114 	assertEqualInt(1, archive_pathmatch("abc[d-fh-k]", "abck", 0));
115 	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abcl", 0));
116 	assertEqualInt(0, archive_pathmatch("abc[d-fh-k]", "abc-", 0));
117 
118 	/* [] matches nothing, [!] is the same as ? */
119 	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcdefg", 0));
120 	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcqefg", 0));
121 	assertEqualInt(0, archive_pathmatch("abc[]efg", "abcefg", 0));
122 	assertEqualInt(1, archive_pathmatch("abc[!]efg", "abcdefg", 0));
123 	assertEqualInt(1, archive_pathmatch("abc[!]efg", "abcqefg", 0));
124 	assertEqualInt(0, archive_pathmatch("abc[!]efg", "abcefg", 0));
125 
126 	/* I assume: Trailing '-' is non-special. */
127 	assertEqualInt(0, archive_pathmatch("abc[d-fh-]", "abcl", 0));
128 	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abch", 0));
129 	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abc-", 0));
130 	assertEqualInt(1, archive_pathmatch("abc[d-fh-]", "abc-", 0));
131 
132 	/* ']' can be backslash-quoted within a character class. */
133 	assertEqualInt(1, archive_pathmatch("abc[\\]]", "abc]", 0));
134 	assertEqualInt(1, archive_pathmatch("abc[\\]d]", "abc]", 0));
135 	assertEqualInt(1, archive_pathmatch("abc[\\]d]", "abcd", 0));
136 	assertEqualInt(1, archive_pathmatch("abc[d\\]]", "abc]", 0));
137 	assertEqualInt(1, archive_pathmatch("abc[d\\]]", "abcd", 0));
138 	assertEqualInt(1, archive_pathmatch("abc[d]e]", "abcde]", 0));
139 	assertEqualInt(1, archive_pathmatch("abc[d\\]e]", "abc]", 0));
140 	assertEqualInt(0, archive_pathmatch("abc[d\\]e]", "abcd]e", 0));
141 	assertEqualInt(0, archive_pathmatch("abc[d]e]", "abc]", 0));
142 
143 	/* backslash-quoted chars can appear as either end of a range. */
144 	assertEqualInt(1, archive_pathmatch("abc[\\d-f]gh", "abcegh", 0));
145 	assertEqualInt(0, archive_pathmatch("abc[\\d-f]gh", "abcggh", 0));
146 	assertEqualInt(0, archive_pathmatch("abc[\\d-f]gh", "abc\\gh", 0));
147 	assertEqualInt(1, archive_pathmatch("abc[d-\\f]gh", "abcegh", 0));
148 	assertEqualInt(1, archive_pathmatch("abc[\\d-\\f]gh", "abcegh", 0));
149 	assertEqualInt(1, archive_pathmatch("abc[\\d-\\f]gh", "abcegh", 0));
150 	/* backslash-quoted '-' isn't special. */
151 	assertEqualInt(0, archive_pathmatch("abc[d\\-f]gh", "abcegh", 0));
152 	assertEqualInt(1, archive_pathmatch("abc[d\\-f]gh", "abc-gh", 0));
153 
154 	/* Leading '!' negates a character class. */
155 	assertEqualInt(0, archive_pathmatch("abc[!d]", "abcd", 0));
156 	assertEqualInt(1, archive_pathmatch("abc[!d]", "abce", 0));
157 	assertEqualInt(1, archive_pathmatch("abc[!d]", "abcc", 0));
158 	assertEqualInt(0, archive_pathmatch("abc[!d-z]", "abcq", 0));
159 	assertEqualInt(1, archive_pathmatch("abc[!d-gi-z]", "abch", 0));
160 	assertEqualInt(1, archive_pathmatch("abc[!fgijkl]", "abch", 0));
161 	assertEqualInt(0, archive_pathmatch("abc[!fghijkl]", "abch", 0));
162 
163 	/* Backslash quotes next character. */
164 	assertEqualInt(0, archive_pathmatch("abc\\[def]", "abc\\d", 0));
165 	assertEqualInt(1, archive_pathmatch("abc\\[def]", "abc[def]", 0));
166 	assertEqualInt(0, archive_pathmatch("abc\\\\[def]", "abc[def]", 0));
167 	assertEqualInt(0, archive_pathmatch("abc\\\\[def]", "abc\\[def]", 0));
168 	assertEqualInt(1, archive_pathmatch("abc\\\\[def]", "abc\\d", 0));
169 	assertEqualInt(1, archive_pathmatch("abcd\\", "abcd\\", 0));
170 	assertEqualInt(0, archive_pathmatch("abcd\\", "abcd\\[", 0));
171 	assertEqualInt(0, archive_pathmatch("abcd\\", "abcde", 0));
172 	assertEqualInt(0, archive_pathmatch("abcd\\[", "abcd\\", 0));
173 
174 	/*
175 	 * Because '.' and '/' have special meanings, we can
176 	 * identify many equivalent paths even if they're expressed
177 	 * differently.  (But quoting a character with '\\' suppresses
178 	 * special meanings!)
179 	 */
180 	assertEqualInt(0, archive_pathmatch("a/b/", "a/bc", 0));
181 	assertEqualInt(1, archive_pathmatch("a/./b", "a/b", 0));
182 	assertEqualInt(0, archive_pathmatch("a\\/./b", "a/b", 0));
183 	assertEqualInt(0, archive_pathmatch("a/\\./b", "a/b", 0));
184 	assertEqualInt(0, archive_pathmatch("a/.\\/b", "a/b", 0));
185 	assertEqualInt(0, archive_pathmatch("a\\/\\.\\/b", "a/b", 0));
186 	assertEqualInt(1, archive_pathmatch("./abc/./def/", "abc/def/", 0));
187 	assertEqualInt(1, archive_pathmatch("abc/def", "./././abc/./def", 0));
188 	assertEqualInt(1, archive_pathmatch("abc/def/././//", "./././abc/./def/", 0));
189 	assertEqualInt(1, archive_pathmatch(".////abc/.//def", "./././abc/./def", 0));
190 	assertEqualInt(1, archive_pathmatch("./abc?def/", "abc/def/", 0));
191 	failure("\"?./\" is not the same as \"/./\"");
192 	assertEqualInt(0, archive_pathmatch("./abc?./def/", "abc/def/", 0));
193 	failure("Trailing '/' should match no trailing '/'");
194 	assertEqualInt(1, archive_pathmatch("./abc/./def/", "abc/def", 0));
195 	failure("Trailing '/./' is still the same directory.");
196 	assertEqualInt(1, archive_pathmatch("./abc/./def/./", "abc/def", 0));
197 	failure("Trailing '/.' is still the same directory.");
198 	assertEqualInt(1, archive_pathmatch("./abc/./def/.", "abc/def", 0));
199 	assertEqualInt(1, archive_pathmatch("./abc/./def", "abc/def/", 0));
200 	failure("Trailing '/./' is still the same directory.");
201 	assertEqualInt(1, archive_pathmatch("./abc/./def", "abc/def/./", 0));
202 	failure("Trailing '/.' is still the same directory.");
203 	assertEqualInt(1, archive_pathmatch("./abc*/./def", "abc/def/.", 0));
204 
205 	/* Matches not anchored at beginning. */
206 	assertEqualInt(0,
207 	    archive_pathmatch("bcd", "abcd", PATHMATCH_NO_ANCHOR_START));
208 	assertEqualInt(1,
209 	    archive_pathmatch("abcd", "abcd", PATHMATCH_NO_ANCHOR_START));
210 	assertEqualInt(0,
211 	    archive_pathmatch("^bcd", "abcd", PATHMATCH_NO_ANCHOR_START));
212 	assertEqualInt(1,
213 	    archive_pathmatch("b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
214 	assertEqualInt(0,
215 	    archive_pathmatch("^b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
216 	assertEqualInt(0,
217 	    archive_pathmatch("/b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
218 	assertEqualInt(0,
219 	    archive_pathmatch("a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
220 	assertEqualInt(1,
221 	    archive_pathmatch("a/b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
222 	assertEqualInt(0,
223 	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
224 	assertEqualInt(0,
225 	    archive_pathmatch("^b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
226 
227 
228 	assertEqualInt(1,
229 	    archive_pathmatch("b/c/d", "a/b/c/d", PATHMATCH_NO_ANCHOR_START));
230 	assertEqualInt(1,
231 	    archive_pathmatch("b/c/d", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START));
232 
233 
234 	/* Matches not anchored at end. */
235 	assertEqualInt(0,
236 	    archive_pathmatch("bcd", "abcd", PATHMATCH_NO_ANCHOR_END));
237 	assertEqualInt(1,
238 	    archive_pathmatch("abcd", "abcd", PATHMATCH_NO_ANCHOR_END));
239 	assertEqualInt(1,
240 	    archive_pathmatch("abcd", "abcd/", PATHMATCH_NO_ANCHOR_END));
241 	assertEqualInt(1,
242 	    archive_pathmatch("abcd", "abcd/.", PATHMATCH_NO_ANCHOR_END));
243 	assertEqualInt(0,
244 	    archive_pathmatch("abc", "abcd", PATHMATCH_NO_ANCHOR_END));
245 	assertEqualInt(1,
246 	    archive_pathmatch("a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
247 	assertEqualInt(0,
248 	    archive_pathmatch("a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
249 	assertEqualInt(1,
250 	    archive_pathmatch("a/b/c$", "a/b/c", PATHMATCH_NO_ANCHOR_END));
251 	assertEqualInt(1,
252 	    archive_pathmatch("a/b/c$", "a/b/c/", PATHMATCH_NO_ANCHOR_END));
253 	assertEqualInt(1,
254 	    archive_pathmatch("a/b/c/", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
255 	assertEqualInt(0,
256 	    archive_pathmatch("a/b/c/$", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
257 	assertEqualInt(1,
258 	    archive_pathmatch("a/b/c/$", "a/b/c/", PATHMATCH_NO_ANCHOR_END));
259 	assertEqualInt(1,
260 	    archive_pathmatch("a/b/c/$", "a/b/c", PATHMATCH_NO_ANCHOR_END));
261 	assertEqualInt(0,
262 	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_END));
263 
264 	/* Matches not anchored at either end. */
265 	assertEqualInt(1,
266 	    archive_pathmatch("b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
267 	assertEqualInt(0,
268 	    archive_pathmatch("/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
269 	assertEqualInt(0,
270 	    archive_pathmatch("/a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
271 	assertEqualInt(1,
272 	    archive_pathmatch("/a/b/c", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
273 	assertEqualInt(0,
274 	    archive_pathmatch("/a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
275 	assertEqualInt(0,
276 	    archive_pathmatch("/a/b/c/d$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
277 	assertEqualInt(0,
278 	    archive_pathmatch("/a/b/c/d$", "/a/b/c/d/e", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
279 	assertEqualInt(1,
280 	    archive_pathmatch("/a/b/c/d$", "/a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
281 	assertEqualInt(1,
282 	    archive_pathmatch("^a/b/c", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
283 	assertEqualInt(0,
284 	    archive_pathmatch("^a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
285 	assertEqualInt(0,
286 	    archive_pathmatch("a/b/c$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
287 	assertEqualInt(1,
288 	    archive_pathmatch("b/c/d$", "a/b/c/d", PATHMATCH_NO_ANCHOR_START | PATHMATCH_NO_ANCHOR_END));
289 }
290