xref: /openbsd/usr.bin/rsync/rules.c (revision d9a51c35)
1 /*	$OpenBSD: rules.c,v 1.5 2022/12/26 19:16:02 jmc Exp $ */
2 /*
3  * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 #include <err.h>
18 #include <stdlib.h>
19 #include <stdio.h>
20 #include <string.h>
21 
22 #include "extern.h"
23 
24 struct rule {
25 	char			*pattern;
26 	enum rule_type		 type;
27 #ifdef NOTYET
28 	unsigned int		 modifiers;
29 #endif
30 	short			 numseg;
31 	unsigned char		 anchored;
32 	unsigned char		 fileonly;
33 	unsigned char		 nowild;
34 	unsigned char		 onlydir;
35 	unsigned char		 leadingdir;
36 };
37 
38 static struct rule	*rules;
39 static size_t		 numrules;	/* number of rules */
40 static size_t		 rulesz;	/* available size */
41 
42 /* up to protocol 29 filter rules only support - + ! and no modifiers */
43 
44 const struct command {
45 	enum rule_type		type;
46 	char			sopt;
47 	const char		*lopt;
48 } commands[] = {
49 	{ RULE_EXCLUDE,		'-',	"exclude" },
50 	{ RULE_INCLUDE,		'+',	"include" },
51 	{ RULE_CLEAR,		'!',	"clear" },
52 #ifdef NOTYET
53 	{ RULE_MERGE,		'.',	"merge" },
54 	{ RULE_DIR_MERGE,	':',	"dir-merge" },
55 	{ RULE_SHOW,		'S',	"show" },
56 	{ RULE_HIDE,		'H',	"hide" },
57 	{ RULE_PROTECT,		'P',	"protect" },
58 	{ RULE_RISK,		'R',	"risk" },
59 #endif
60 	{ 0 }
61 };
62 
63 #ifdef NOTYET
64 #define MOD_ABSOLUTE			0x0001
65 #define MOD_NEGATE			0x0002
66 #define MOD_CVSEXCLUDE			0x0004
67 #define MOD_SENDING			0x0008
68 #define MOD_RECEIVING			0x0010
69 #define MOD_PERISHABLE			0x0020
70 #define MOD_XATTR			0x0040
71 #define MOD_MERGE_EXCLUDE		0x0080
72 #define MOD_MERGE_INCLUDE		0x0100
73 #define MOD_MERGE_CVSCOMPAT		0x0200
74 #define MOD_MERGE_EXCLUDE_FILE		0x0400
75 #define MOD_MERGE_NO_INHERIT		0x0800
76 #define MOD_MERGE_WORDSPLIT		0x1000
77 
78 /* maybe support absolute and negate */
79 const struct modifier {
80 	unsigned int		modifier;
81 	char			sopt;
82 } modifiers[] = {
83 	{ MOD_ABSOLUTE,			'/' },
84 	{ MOD_NEGATE,			'!' },
85 	{ MOD_CVSEXCLUDE,		'C' },
86 	{ MOD_SENDING,			's' },
87 	{ MOD_RECEIVING,		'r' },
88 	{ MOD_PERISHABLE,		'p' },
89 	{ MOD_XATTR,			'x' },
90 	/* for '.' and ':' types */
91 	{ MOD_MERGE_EXCLUDE,		'-' },
92 	{ MOD_MERGE_INCLUDE,		'+' },
93 	{ MOD_MERGE_CVSCOMPAT,		'C' },
94 	{ MOD_MERGE_EXCLUDE_FILE,	'e' },
95 	{ MOD_MERGE_NO_INHERIT,		'n' },
96 	{ MOD_MERGE_WORDSPLIT,		'w' },
97 	{ 0 }
98 }
99 #endif
100 
101 static struct rule *
102 get_next_rule(void)
103 {
104 	struct rule *new;
105 	size_t newsz;
106 
107 	if (++numrules > rulesz) {
108 		if (rulesz == 0)
109 			newsz = 16;
110 		else
111 			newsz = rulesz * 2;
112 
113 		new = recallocarray(rules, rulesz, newsz, sizeof(*rules));
114 		if (new == NULL)
115 			err(ERR_NOMEM, NULL);
116 
117 		rules = new;
118 		rulesz = newsz;
119 	}
120 
121 	return rules + numrules - 1;
122 }
123 
124 static enum rule_type
125 parse_command(const char *command, size_t len)
126 {
127 	const char *mod;
128 	size_t	i;
129 
130 	mod = memchr(command, ',', len);
131 	if (mod != NULL) {
132 		/* XXX modifiers not yet implemented */
133 		return RULE_NONE;
134 	}
135 
136 	for (i = 0; commands[i].type != RULE_NONE; i++) {
137 		if (strncmp(commands[i].lopt, command, len) == 0)
138 			return commands[i].type;
139 		if (len == 1 && commands[i].sopt == *command)
140 			return commands[i].type;
141 	}
142 
143 	return RULE_NONE;
144 }
145 
146 static void
147 parse_pattern(struct rule *r, char *pattern)
148 {
149 	size_t plen;
150 	char *p;
151 	short nseg = 1;
152 
153 	/*
154 	 * check for / at start and end of pattern both are special and
155 	 * can bypass full path matching.
156 	 */
157 	if (*pattern == '/') {
158 		pattern++;
159 		r->anchored = 1;
160 	}
161 	plen = strlen(pattern);
162 	/*
163 	 * check for patterns ending in '/' and '/'+'***' and handle them
164 	 * specially. Because of this and the check above pattern will never
165 	 * start or end with a '/'.
166 	 */
167 	if (plen > 1 && pattern[plen - 1] == '/') {
168 		r->onlydir = 1;
169 		pattern[plen - 1] = '\0';
170 	}
171 	if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {
172 		r->leadingdir = 1;
173 		pattern[plen - 4] = '\0';
174 	}
175 
176 	/* count how many segments the pattern has. */
177 	for (p = pattern; *p != '\0'; p++)
178 		if (*p == '/')
179 			nseg++;
180 	r->numseg = nseg;
181 
182 	/* check if this pattern only matches against the basename */
183 	if (nseg == 1 && !r->anchored)
184 		r->fileonly = 1;
185 
186 	if (strpbrk(pattern, "*?[") == NULL) {
187 		/* no wildchar matching */
188 		r->nowild = 1;
189 	} else {
190 		/* requires wildchar matching */
191 		if (strstr(pattern, "**") != NULL)
192 			r->numseg = -1;
193 	}
194 
195 	r->pattern = strdup(pattern);
196 	if (r->pattern == NULL)
197 		err(ERR_NOMEM, NULL);
198 }
199 
200 int
201 parse_rule(char *line, enum rule_type def)
202 {
203 	enum rule_type type;
204 	struct rule *r;
205 	char *pattern;
206 	size_t len;
207 
208 	switch (*line) {
209 	case '#':
210 	case ';':
211 		/* comment */
212 		return 0;
213 	case '\0':
214 		/* ignore empty lines */
215 		return 0;
216 	default:
217 		len = strcspn(line, " _");
218 		type = parse_command(line, len);
219 		if (type == RULE_NONE) {
220 			if (def == RULE_NONE)
221 				return -1;
222 			type = def;
223 			pattern = line;
224 		} else
225 			pattern = line + len + 1;
226 
227 		if (*pattern == '\0' && type != RULE_CLEAR)
228 			return -1;
229 		if (*pattern != '\0' && type == RULE_CLEAR)
230 			return -1;
231 		break;
232 	}
233 
234 	r = get_next_rule();
235 	r->type = type;
236 	parse_pattern(r, pattern);
237 
238 	return 0;
239 }
240 
241 void
242 parse_file(const char *file, enum rule_type def)
243 {
244 	FILE *fp;
245 	char *line = NULL;
246 	size_t linesize = 0, linenum = 0;
247 	ssize_t linelen;
248 
249 	if ((fp = fopen(file, "r")) == NULL)
250 		err(ERR_SYNTAX, "open: %s", file);
251 
252 	while ((linelen = getline(&line, &linesize, fp)) != -1) {
253 		linenum++;
254 		line[linelen - 1] = '\0';
255 		if (parse_rule(line, def) == -1)
256 			errx(ERR_SYNTAX, "syntax error in %s at entry %zu",
257 			    file, linenum);
258 	}
259 
260 	free(line);
261 	if (ferror(fp))
262 		err(ERR_SYNTAX, "failed to parse file %s", file);
263 	fclose(fp);
264 }
265 
266 static const char *
267 send_command(struct rule *r)
268 {
269 	static char buf[16];
270 	char *b = buf;
271 	char *ep = buf + sizeof(buf);
272 
273 	switch (r->type) {
274 	case RULE_EXCLUDE:
275 		*b++ = '-';
276 		break;
277 	case RULE_INCLUDE:
278 		*b++ = '+';
279 		break;
280 	case RULE_CLEAR:
281 		*b++ = '!';
282 		break;
283 #ifdef NOTYET
284 	case RULE_MERGE:
285 		*b++ = '.';
286 		break;
287 	case RULE_DIR_MERGE:
288 		*b++ = ':';
289 		break;
290 	case RULE_SHOW:
291 		*b++ = 'S';
292 		break;
293 	case RULE_HIDE:
294 		*b++ = 'H';
295 		break;
296 	case RULE_PROTECT:
297 		*b++ = 'P';
298 		break;
299 	case RULE_RISK:
300 		*b++ = 'R';
301 		break;
302 #endif
303 	default:
304 		err(ERR_SYNTAX, "unknown rule type %d", r->type);
305 	}
306 
307 #ifdef NOTYET
308 	for (i = 0; modifiers[i].modifier != 0; i++) {
309 		if (rule->modifiers & modifiers[i].modifier)
310 			*b++ = modifiers[i].sopt;
311 		if (b >= ep - 3)
312 			err(ERR_SYNTAX, "rule modifiers overflow");
313 	}
314 #endif
315 	if (b >= ep - 3)
316 		err(ERR_SYNTAX, "rule prefix overflow");
317 	*b++ = ' ';
318 
319 	/* include the stripped root '/' for anchored patterns */
320 	if (r->anchored)
321 		*b++ = '/';
322 	*b++ = '\0';
323 	return buf;
324 }
325 
326 static const char *
327 postfix_command(struct rule *r)
328 {
329 	static char buf[8];
330 
331 	buf[0] = '\0';
332 	if (r->onlydir)
333 		strlcpy(buf, "/", sizeof(buf));
334 	if (r->leadingdir)
335 		strlcpy(buf, "/***", sizeof(buf));
336 
337 	return buf;
338 }
339 
340 void
341 send_rules(struct sess *sess, int fd)
342 {
343 	const char *cmd;
344 	const char *postfix;
345 	struct rule *r;
346 	size_t cmdlen, len, postlen, i;
347 
348 	for (i = 0; i < numrules; i++) {
349 		r = &rules[i];
350 		cmd = send_command(r);
351 		if (cmd == NULL)
352 			err(ERR_PROTOCOL,
353 			    "rules are incompatible with remote rsync");
354 		postfix = postfix_command(r);
355 		cmdlen = strlen(cmd);
356 		len = strlen(r->pattern);
357 		postlen = strlen(postfix);
358 
359 		if (!io_write_int(sess, fd, cmdlen + len + postlen))
360 			err(ERR_SOCK_IO, "send rules");
361 		if (!io_write_buf(sess, fd, cmd, cmdlen))
362 			err(ERR_SOCK_IO, "send rules");
363 		if (!io_write_buf(sess, fd, r->pattern, len))
364 			err(ERR_SOCK_IO, "send rules");
365 		/* include the '/' stripped by onlydir */
366 		if (postlen > 0)
367 			if (!io_write_buf(sess, fd, postfix, postlen))
368 				err(ERR_SOCK_IO, "send rules");
369 	}
370 
371 	if (!io_write_int(sess, fd, 0))
372 		err(ERR_SOCK_IO, "send rules");
373 }
374 
375 void
376 recv_rules(struct sess *sess, int fd)
377 {
378 	char line[8192];
379 	size_t len;
380 
381 	do {
382 		if (!io_read_size(sess, fd, &len))
383 			err(ERR_SOCK_IO, "receive rules");
384 
385 		if (len == 0)
386 			return;
387 		if (len >= sizeof(line) - 1)
388 			errx(ERR_SOCK_IO, "received rule too long");
389 		if (!io_read_buf(sess, fd, line, len))
390 			err(ERR_SOCK_IO, "receive rules");
391 		line[len] = '\0';
392 		if (parse_rule(line, RULE_NONE) == -1)
393 			errx(ERR_PROTOCOL, "syntax error in received rules");
394 	} while (1);
395 }
396 
397 static inline int
398 rule_matched(struct rule *r)
399 {
400 	/* TODO apply negation once modifiers are added */
401 
402 	if (r->type == RULE_EXCLUDE)
403 		return -1;
404 	else
405 		return 1;
406 }
407 
408 int
409 rules_match(const char *path, int isdir)
410 {
411 	const char *basename, *p = NULL;
412 	struct rule *r;
413 	size_t i;
414 
415 	basename = strrchr(path, '/');
416 	if (basename != NULL)
417 		basename += 1;
418 	else
419 		basename = path;
420 
421 	for (i = 0; i < numrules; i++) {
422 		r = &rules[i];
423 
424 		if (r->onlydir && !isdir)
425 			continue;
426 
427 		if (r->nowild) {
428 			/* fileonly and anchored are mutually exclusive */
429 			if (r->fileonly) {
430 				if (strcmp(basename, r->pattern) == 0)
431 					return rule_matched(r);
432 			} else if (r->anchored) {
433 				/*
434 				 * assumes that neither path nor pattern
435 				 * start with a '/'.
436 				 */
437 				if (strcmp(path, r->pattern) == 0)
438 					return rule_matched(r);
439 			} else if (r->leadingdir) {
440 				size_t plen = strlen(r->pattern);
441 
442 				p = strstr(path, r->pattern);
443 				/*
444 				 * match from start or dir boundary also
445 				 * match to end or to dir boundary
446 				 */
447 				if (p != NULL && (p == path || p[-1] == '/') &&
448 				    (p[plen] == '\0' || p[plen] == '/'))
449 					return rule_matched(r);
450 			} else {
451 				size_t len = strlen(path);
452 				size_t plen = strlen(r->pattern);
453 
454 				if (len >= plen && strcmp(path + len - plen,
455 				    r->pattern) == 0) {
456 					/* match all or start on dir boundary */
457 					if (len == plen ||
458 					    path[len - plen - 1] == '/')
459 						return rule_matched(r);
460 				}
461 			}
462 		} else {
463 			if (r->fileonly) {
464 				p = basename;
465 			} else if (r->anchored || r->numseg == -1) {
466 				/* full path matching */
467 				p = path;
468 			} else {
469 				short nseg = 1;
470 
471 				/* match against the last numseg elements */
472 				for (p = path; *p != '\0'; p++)
473 					if (*p == '/')
474 						nseg++;
475 				if (nseg < r->numseg) {
476 					p = NULL;
477 				} else {
478 					nseg -= r->numseg;
479 					for (p = path; *p != '\0' && nseg > 0;
480 					    p++) {
481 						if (*p == '/')
482 							nseg--;
483 					}
484 				}
485 			}
486 
487 			if (p != NULL) {
488 				if (rmatch(r->pattern, p, r->leadingdir) == 0)
489 					return rule_matched(r);
490 			}
491 		}
492 	}
493 
494 	return 0;
495 }
496