1 #include "types/uri.h"
2 
3 #include <errno.h>
4 #include <strings.h>
5 #include "rrdp/db/db_rrdp_uris.h"
6 #include "common.h"
7 #include "config.h"
8 #include "log.h"
9 #include "str_token.h"
10 
11 /* Expected URI types */
12 enum rpki_uri_type {
13 	URI_RSYNC,
14 	URI_HTTPS,
15 };
16 
17 static char const *const PFX_RSYNC = "rsync://";
18 static char const *const PFX_HTTPS = "https://";
19 
20 /**
21  * Design notes:
22  *
23  * Because we need to generate @local from @global, @global's allowed character
24  * set must be a subset of @local. Because this is Unix, @local must never
25  * contain NULL (except as a terminating character). Therefore, even though IA5
26  * allows NULL, @global won't.
27  *
28  * Because we will simply embed @global (minus "rsync://") into @local, @local's
29  * encoding must be IA5-compatible. In other words, UTF-16 and UTF-32 are out of
30  * the question.
31  *
32  * Aside from the reference counter, instances are meant to be immutable.
33  */
34 struct rpki_uri {
35 	/**
36 	 * "Global URI".
37 	 * The one that always starts with "rsync://" or "https://".
38 	 *
39 	 * These things are IA5-encoded, which means you're not bound to get
40 	 * non-ASCII characters.
41 	 */
42 	char *global;
43 	/** Length of @global. */
44 	size_t global_len;
45 
46 	/**
47 	 * "Local URI".
48 	 * The file pointed by @global, but cached in the local filesystem.
49 	 *
50 	 * I can't find a standard that defines this, but lots of complaints on
51 	 * the Internet imply that Unix file paths are specifically meant to be
52 	 * C strings.
53 	 *
54 	 * So just to clarify: This is a string that permits all characters,
55 	 * printable or otherwise, except \0. (Because that's the terminating
56 	 * character.)
57 	 *
58 	 * Even though it might contain characters that are non-printable
59 	 * according to ASCII, we assume that we can just dump it into the
60 	 * output without trouble, because the input should have the same
61 	 * encoding as the output.
62 	 */
63 	char *local;
64 	/* "local_len" is never needed right now. */
65 
66 	/* Type, currently rysnc and https are valid */
67 	enum rpki_uri_type type;
68 
69 	unsigned int references;
70 };
71 
72 /*
73  * @character is an integer because we sometimes receive signed chars, and other
74  * times we get unsigned chars.
75  * Casting a negative char into a unsigned char is undefined behavior.
76  */
77 static int
validate_url_character(int character)78 validate_url_character(int character)
79 {
80 	/*
81 	 * RFCs 1738 and 3986 define a very specific range of allowed
82 	 * characters, but I don't think we're that concerned about URL
83 	 * correctness. Validating the URL properly is more involved than simply
84 	 * checking legal characters, anyway.
85 	 *
86 	 * What I really need this validation for is ensure that we won't get
87 	 * any trouble later, when we attempt to convert the global URI to a
88 	 * local file.
89 	 *
90 	 * Sample trouble: Getting UTF-8 characters. Why are they trouble?
91 	 * Because we don't have any guarantees that the system's file name
92 	 * encoding is UTF-8. URIs are not supposed to contain UTF-8 in the
93 	 * first place, so we have no reason to deal with encoding conversion.
94 	 *
95 	 * To be perfectly fair, we have no guarantees that the system's file
96 	 * name encoding is ASCII-compatible either, but I need to hang onto
97 	 * SOMETHING.
98 	 *
99 	 * (Asking users to use UTF-8 is fine, but asking users to use something
100 	 * ASCII-compatible is a little better.)
101 	 *
102 	 * So just make sure that the character is printable ASCII.
103 	 *
104 	 * TODO (next iteration) Consider exhaustive URL validation.
105 	 */
106 	return (0x20 <= character && character <= 0x7E)
107 	    ? 0
108 	    : pr_val_err("URL has non-printable character code '%d'.", character);
109 }
110 
111 /**
112  * Initializes @uri->global* by cloning @str.
113  * This function does not assume that @str is null-terminated.
114  */
115 static int
str2global(char const * str,size_t str_len,struct rpki_uri * uri)116 str2global(char const *str, size_t str_len, struct rpki_uri *uri)
117 {
118 	int error;
119 	size_t i;
120 
121 	for (i = 0; i < str_len; i++) {
122 		error = validate_url_character(str[i]);
123 		if (error)
124 			return error;
125 	}
126 
127 	uri->global = malloc(str_len + 1);
128 	if (uri->global == NULL)
129 		return pr_enomem();
130 	strncpy(uri->global, str, str_len);
131 	uri->global[str_len] = '\0';
132 	uri->global_len = str_len;
133 
134 	return 0;
135 }
136 
137 static bool
is_valid_mft_file_chara(uint8_t chara)138 is_valid_mft_file_chara(uint8_t chara)
139 {
140 	return ('a' <= chara && chara <= 'z')
141 	    || ('A' <= chara && chara <= 'Z')
142 	    || ('0' <= chara && chara <= '9')
143 	    || (chara == '-')
144 	    || (chara == '_');
145 }
146 
147 /* RFC 6486bis, section 4.2.2 */
148 static int
validate_mft_file(IA5String_t * ia5)149 validate_mft_file(IA5String_t *ia5)
150 {
151 	size_t dot;
152 	size_t i;
153 
154 	if (ia5->size < 5)
155 		return pr_val_err("File name is too short (%zu < 5).", ia5->size);
156 	dot = ia5->size - 4;
157 	if (ia5->buf[dot] != '.')
158 		return pr_val_err("File name seems to lack a three-letter extension.");
159 
160 	for (i = 0; i < ia5->size; i++) {
161 		if (i != dot && !is_valid_mft_file_chara(ia5->buf[i])) {
162 			return pr_val_err("File name contains illegal character #%u",
163 			    ia5->buf[i]);
164 		}
165 	}
166 
167 	/*
168 	 * Actual extension doesn't matter; if there's no handler,
169 	 * we'll naturally ignore the file.
170 	 */
171 	return 0;
172 }
173 
174 /**
175  * Initializes @uri->global given manifest path @mft and its referenced file
176  * @ia5.
177  *
178  * ie. if @mft is "rsync://a/b/c.mft" and @ia5 is "d.cer", @uri->global will
179  * be "rsync://a/b/d.cer".
180  *
181  * Assumes that @mft is a "global" URL. (ie. extracted from rpki_uri.global.)
182  */
183 static int
ia5str2global(struct rpki_uri * uri,char const * mft,IA5String_t * ia5)184 ia5str2global(struct rpki_uri *uri, char const *mft, IA5String_t *ia5)
185 {
186 	char *joined;
187 	char *slash_pos;
188 	int dir_len;
189 	int error;
190 
191 	/*
192 	 * IA5String is a subset of ASCII. However, IA5String_t doesn't seem to
193 	 * be guaranteed to be NULL-terminated.
194 	 * `(char *) ia5->buf` is fair, but `strlen(ia5->buf)` is not.
195 	 */
196 
197 	error = validate_mft_file(ia5);
198 	if (error)
199 		return error;
200 
201 	slash_pos = strrchr(mft, '/');
202 	if (slash_pos == NULL)
203 		return pr_val_err("Manifest URL '%s' contains no slashes.", mft);
204 
205 	dir_len = (slash_pos + 1) - mft;
206 	joined = malloc(dir_len + ia5->size + 1);
207 	if (joined == NULL)
208 		return pr_enomem();
209 
210 	strncpy(joined, mft, dir_len);
211 	strncpy(joined + dir_len, (char *) ia5->buf, ia5->size);
212 	joined[dir_len + ia5->size] = '\0';
213 
214 	uri->global = joined;
215 	uri->global_len = dir_len + ia5->size;
216 	return 0;
217 }
218 
219 static int
validate_uri_begin(char const * uri_pfx,const size_t uri_pfx_len,char const * global,size_t global_len,int error)220 validate_uri_begin(char const *uri_pfx, const size_t uri_pfx_len,
221     char const *global, size_t global_len, int error)
222 {
223 	if (global_len < uri_pfx_len
224 	    || strncasecmp(uri_pfx, global, uri_pfx_len) != 0) {
225 		if (!error)
226 			return -EINVAL;
227 		pr_val_err("Global URI '%s' does not begin with '%s'.",
228 		    global, uri_pfx);
229 		return error;
230 	}
231 
232 	return 0;
233 }
234 
235 static int
validate_gprefix(char const * global,size_t global_len,uint8_t flags,enum rpki_uri_type * type)236 validate_gprefix(char const *global, size_t global_len, uint8_t flags,
237     enum rpki_uri_type *type)
238 {
239 	size_t const PFX_RSYNC_LEN = strlen(PFX_RSYNC);
240 	size_t const PFX_HTTPS_LEN = strlen(PFX_HTTPS);
241 	uint8_t l_flags;
242 	int error;
243 
244 	/* Exclude RSYNC RRDP flag, isn't relevant here */
245 	l_flags = flags & ~URI_USE_RRDP_WORKSPACE;
246 
247 	if (l_flags == URI_VALID_RSYNC) {
248 		(*type) = URI_RSYNC;
249 		return validate_uri_begin(PFX_RSYNC, PFX_RSYNC_LEN, global,
250 		    global_len, ENOTRSYNC);
251 	}
252 	if (l_flags == URI_VALID_HTTPS) {
253 		(*type) = URI_HTTPS;
254 		return validate_uri_begin(PFX_HTTPS, PFX_HTTPS_LEN, global,
255 		    global_len, ENOTHTTPS);
256 	}
257 	if (l_flags != (URI_VALID_RSYNC | URI_VALID_HTTPS))
258 		pr_crit("Unknown URI flag");
259 
260 	/* It has both flags */
261 	error = validate_uri_begin(PFX_RSYNC, PFX_RSYNC_LEN, global, global_len,
262 	    0);
263 	if (!error) {
264 		(*type) = URI_RSYNC;
265 		return 0;
266 	}
267 	error = validate_uri_begin(PFX_HTTPS, PFX_HTTPS_LEN, global, global_len,
268 	    0);
269 	if (error) {
270 		pr_val_warn("URI '%s' does not begin with '%s' nor '%s'.",
271 		    global, PFX_RSYNC, PFX_HTTPS);
272 		return ENOTSUPPORTED;
273 	}
274 
275 	/* @size was already set */
276 	(*type) = URI_HTTPS;
277 	return 0;
278 }
279 
280 static int
get_local_workspace(char ** result)281 get_local_workspace(char **result)
282 {
283 	char const *workspace;
284 	char *tmp;
285 
286 	workspace = db_rrdp_uris_workspace_get();
287 	if (workspace == NULL) {
288 		*result = NULL;
289 		return 0;
290 	}
291 
292 	tmp = strdup(workspace);
293 	if (tmp == NULL)
294 		return pr_enomem();
295 
296 	*result = tmp;
297 	return 0;
298 }
299 
300 /**
301  * Initializes @uri->local by converting @uri->global.
302  *
303  * For example, given local cache repository "/tmp/rpki" and global uri
304  * "rsync://rpki.ripe.net/repo/manifest.mft", initializes @uri->local as
305  * "/tmp/rpki/rpki.ripe.net/repo/manifest.mft".
306  *
307  * By contract, if @guri is not RSYNC nor HTTPS, this will return ENOTRSYNC.
308  * This often should not be treated as an error; please handle gracefully.
309  */
310 static int
g2l(char const * global,size_t global_len,uint8_t flags,char ** result,enum rpki_uri_type * result_type)311 g2l(char const *global, size_t global_len, uint8_t flags, char **result,
312     enum rpki_uri_type *result_type)
313 {
314 	char *local;
315 	char *workspace;
316 	enum rpki_uri_type type;
317 	int error;
318 
319 	error = validate_gprefix(global, global_len, flags, &type);
320 	if (error)
321 		return error;
322 
323 	workspace = NULL;
324 	if ((flags & URI_USE_RRDP_WORKSPACE) != 0) {
325 		error = get_local_workspace(&workspace);
326 		if (error)
327 			return error;
328 	}
329 
330 	error = map_uri_to_local(global,
331 	    type == URI_RSYNC ? PFX_RSYNC : PFX_HTTPS,
332 	    workspace,
333 	    &local);
334 	if (error) {
335 		free(workspace);
336 		return error;
337 	}
338 
339 	free(workspace);
340 	*result = local;
341 	(*result_type) = type;
342 	return 0;
343 }
344 
345 static int
autocomplete_local(struct rpki_uri * uri,uint8_t flags)346 autocomplete_local(struct rpki_uri *uri, uint8_t flags)
347 {
348 	return g2l(uri->global, uri->global_len, flags, &uri->local,
349 	    &uri->type);
350 }
351 
352 static int
uri_create(struct rpki_uri ** result,uint8_t flags,void const * guri,size_t guri_len)353 uri_create(struct rpki_uri **result, uint8_t flags, void const *guri,
354     size_t guri_len)
355 {
356 	struct rpki_uri *uri;
357 	int error;
358 
359 	uri = malloc(sizeof(struct rpki_uri));
360 	if (uri == NULL)
361 		return pr_enomem();
362 
363 	error = str2global(guri, guri_len, uri);
364 	if (error) {
365 		free(uri);
366 		return error;
367 	}
368 
369 	error = autocomplete_local(uri, flags);
370 	if (error) {
371 		free(uri->global);
372 		free(uri);
373 		return error;
374 	}
375 
376 	uri->references = 1;
377 	*result = uri;
378 	return 0;
379 }
380 
381 int
uri_create_rsync_str_rrdp(struct rpki_uri ** uri,char const * guri,size_t guri_len)382 uri_create_rsync_str_rrdp(struct rpki_uri **uri, char const *guri,
383     size_t guri_len)
384 {
385 	return uri_create(uri, URI_VALID_RSYNC | URI_USE_RRDP_WORKSPACE, guri,
386 	    guri_len);
387 }
388 
389 int
uri_create_https_str_rrdp(struct rpki_uri ** uri,char const * guri,size_t guri_len)390 uri_create_https_str_rrdp(struct rpki_uri **uri, char const *guri,
391     size_t guri_len)
392 {
393 	return uri_create(uri, URI_VALID_HTTPS | URI_USE_RRDP_WORKSPACE, guri,
394 	    guri_len);
395 }
396 
397 int
uri_create_rsync_str(struct rpki_uri ** uri,char const * guri,size_t guri_len)398 uri_create_rsync_str(struct rpki_uri **uri, char const *guri, size_t guri_len)
399 {
400 	return uri_create(uri, URI_VALID_RSYNC, guri, guri_len);
401 }
402 
403 /*
404  * A URI that can be rsync or https.
405  *
406  * Return ENOTSUPPORTED if not an rsync or https URI.
407  */
408 int
uri_create_mixed_str(struct rpki_uri ** uri,char const * guri,size_t guri_len)409 uri_create_mixed_str(struct rpki_uri **uri, char const *guri, size_t guri_len)
410 {
411 	return uri_create(uri, URI_VALID_RSYNC | URI_VALID_HTTPS, guri,
412 	    guri_len);
413 }
414 
415 /*
416  * Manifest fileList entries are a little special in that they're just file
417  * names. This function will infer the rest of the URL.
418  */
419 int
uri_create_mft(struct rpki_uri ** result,struct rpki_uri * mft,IA5String_t * ia5,bool use_rrdp_workspace)420 uri_create_mft(struct rpki_uri **result, struct rpki_uri *mft, IA5String_t *ia5,
421     bool use_rrdp_workspace)
422 {
423 	struct rpki_uri *uri;
424 	uint8_t flags;
425 	int error;
426 
427 	uri = malloc(sizeof(struct rpki_uri));
428 	if (uri == NULL)
429 		return pr_enomem();
430 
431 	error = ia5str2global(uri, mft->global, ia5);
432 	if (error) {
433 		free(uri);
434 		return error;
435 	}
436 
437 	flags = URI_VALID_RSYNC;
438 	if (use_rrdp_workspace)
439 		flags |= URI_USE_RRDP_WORKSPACE;
440 
441 	error = autocomplete_local(uri, flags);
442 	if (error) {
443 		free(uri->global);
444 		free(uri);
445 		return error;
446 	}
447 
448 	uri->references = 1;
449 	*result = uri;
450 	return 0;
451 }
452 
453 /*
454  * Create @uri from the @ad, validating that the uri is of type(s) indicated
455  * at @flags (can be URI_VALID_RSYNC and/or URI_VALID_HTTPS)
456  */
457 int
uri_create_ad(struct rpki_uri ** uri,ACCESS_DESCRIPTION * ad,int flags)458 uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, int flags)
459 {
460 	ASN1_STRING *asn1_string;
461 	int type;
462 
463 	asn1_string = GENERAL_NAME_get0_value(ad->location, &type);
464 
465 	/*
466 	 * RFC 6487: "This extension MUST have an instance of an
467 	 * AccessDescription with an accessMethod of id-ad-rpkiManifest, (...)
468 	 * with an rsync URI [RFC5781] form of accessLocation."
469 	 *
470 	 * Ehhhhhh. It's a little annoying in that it seems to be stucking more
471 	 * than one requirement in a single sentence, which I think is rather
472 	 * rare for an RFC. Normally they tend to hammer things more.
473 	 *
474 	 * Does it imply that the GeneralName CHOICE is constrained to type
475 	 * "uniformResourceIdentifier"? I guess so, though I don't see anything
476 	 * stopping a few of the other types from also being capable of storing
477 	 * URIs.
478 	 *
479 	 * Also, nobody seems to be using the other types, and handling them
480 	 * would be a titanic pain in the ass. So this is what I'm committing
481 	 * to.
482 	 */
483 	if (type != GEN_URI) {
484 		pr_val_err("Unknown GENERAL_NAME type: %d", type);
485 		return ENOTSUPPORTED;
486 	}
487 
488 	/*
489 	 * GEN_URI signals an IA5String.
490 	 * IA5String is a subset of ASCII, so this cast is safe.
491 	 * No guarantees of a NULL chara, though.
492 	 *
493 	 * TODO (testers) According to RFC 5280, accessLocation can be an IRI
494 	 * somehow converted into URI form. I don't think that's an issue
495 	 * because the RSYNC clone operation should not have performed the
496 	 * conversion, so we should be looking at precisely the IA5String
497 	 * directory our g2l version of @asn1_string should contain.
498 	 * But ask the testers to keep an eye on it anyway.
499 	 */
500 	return uri_create(uri, flags,
501 	    ASN1_STRING_get0_data(asn1_string),
502 	    ASN1_STRING_length(asn1_string));
503 }
504 
505 void
uri_refget(struct rpki_uri * uri)506 uri_refget(struct rpki_uri *uri)
507 {
508 	uri->references++;
509 }
510 
511 void
uri_refput(struct rpki_uri * uri)512 uri_refput(struct rpki_uri *uri)
513 {
514 	uri->references--;
515 	if (uri->references == 0) {
516 		free(uri->global);
517 		free(uri->local);
518 		free(uri);
519 	}
520 }
521 
522 char const *
uri_get_global(struct rpki_uri * uri)523 uri_get_global(struct rpki_uri *uri)
524 {
525 	return uri->global;
526 }
527 
528 char const *
uri_get_local(struct rpki_uri * uri)529 uri_get_local(struct rpki_uri *uri)
530 {
531 	return uri->local;
532 }
533 
534 size_t
uri_get_global_len(struct rpki_uri * uri)535 uri_get_global_len(struct rpki_uri *uri)
536 {
537 	return uri->global_len;
538 }
539 
540 bool
uri_equals(struct rpki_uri * u1,struct rpki_uri * u2)541 uri_equals(struct rpki_uri *u1, struct rpki_uri *u2)
542 {
543 	return strcmp(u1->global, u2->global) == 0;
544 }
545 
546 /* @ext must include the period. */
547 bool
uri_has_extension(struct rpki_uri * uri,char const * ext)548 uri_has_extension(struct rpki_uri *uri, char const *ext)
549 {
550 	size_t ext_len;
551 	int cmp;
552 
553 	ext_len = strlen(ext);
554 	if (uri->global_len < ext_len)
555 		return false;
556 
557 	cmp = strncmp(uri->global + uri->global_len - ext_len, ext, ext_len);
558 	return cmp == 0;
559 }
560 
561 bool
uri_is_certificate(struct rpki_uri * uri)562 uri_is_certificate(struct rpki_uri *uri)
563 {
564 	return uri_has_extension(uri, ".cer");
565 }
566 
567 bool
uri_is_rsync(struct rpki_uri * uri)568 uri_is_rsync(struct rpki_uri *uri)
569 {
570 	return uri->type == URI_RSYNC;
571 }
572 
573 static char const *
get_filename(char const * file_path)574 get_filename(char const *file_path)
575 {
576 	char *slash = strrchr(file_path, '/');
577 	return (slash != NULL) ? (slash + 1) : file_path;
578 }
579 
580 static char const *
uri_get_printable(struct rpki_uri * uri,enum filename_format format)581 uri_get_printable(struct rpki_uri *uri, enum filename_format format)
582 {
583 	switch (format) {
584 	case FNF_GLOBAL:
585 		return uri->global;
586 	case FNF_LOCAL:
587 		return uri->local;
588 	case FNF_NAME:
589 		return get_filename(uri->global);
590 	}
591 
592 	pr_crit("Unknown file name format: %u", format);
593 	return NULL;
594 }
595 
596 char const *
uri_val_get_printable(struct rpki_uri * uri)597 uri_val_get_printable(struct rpki_uri *uri)
598 {
599 	enum filename_format format;
600 
601 	format = config_get_val_log_filename_format();
602 	return uri_get_printable(uri, format);
603 }
604 
605 char const *
uri_op_get_printable(struct rpki_uri * uri)606 uri_op_get_printable(struct rpki_uri *uri)
607 {
608 	enum filename_format format;
609 
610 	format = config_get_op_log_filename_format();
611 	return uri_get_printable(uri, format);
612 }
613