1 #include "types/uri.h"
2
3 #include <errno.h>
4 #include <strings.h>
5 #include "rrdp/db/db_rrdp_uris.h"
6 #include "common.h"
7 #include "config.h"
8 #include "log.h"
9 #include "str_token.h"
10
11 /* Expected URI types */
12 enum rpki_uri_type {
13 URI_RSYNC,
14 URI_HTTPS,
15 };
16
17 static char const *const PFX_RSYNC = "rsync://";
18 static char const *const PFX_HTTPS = "https://";
19
20 /**
21 * Design notes:
22 *
23 * Because we need to generate @local from @global, @global's allowed character
24 * set must be a subset of @local. Because this is Unix, @local must never
25 * contain NULL (except as a terminating character). Therefore, even though IA5
26 * allows NULL, @global won't.
27 *
28 * Because we will simply embed @global (minus "rsync://") into @local, @local's
29 * encoding must be IA5-compatible. In other words, UTF-16 and UTF-32 are out of
30 * the question.
31 *
32 * Aside from the reference counter, instances are meant to be immutable.
33 */
34 struct rpki_uri {
35 /**
36 * "Global URI".
37 * The one that always starts with "rsync://" or "https://".
38 *
39 * These things are IA5-encoded, which means you're not bound to get
40 * non-ASCII characters.
41 */
42 char *global;
43 /** Length of @global. */
44 size_t global_len;
45
46 /**
47 * "Local URI".
48 * The file pointed by @global, but cached in the local filesystem.
49 *
50 * I can't find a standard that defines this, but lots of complaints on
51 * the Internet imply that Unix file paths are specifically meant to be
52 * C strings.
53 *
54 * So just to clarify: This is a string that permits all characters,
55 * printable or otherwise, except \0. (Because that's the terminating
56 * character.)
57 *
58 * Even though it might contain characters that are non-printable
59 * according to ASCII, we assume that we can just dump it into the
60 * output without trouble, because the input should have the same
61 * encoding as the output.
62 */
63 char *local;
64 /* "local_len" is never needed right now. */
65
66 /* Type, currently rysnc and https are valid */
67 enum rpki_uri_type type;
68
69 unsigned int references;
70 };
71
72 /*
73 * @character is an integer because we sometimes receive signed chars, and other
74 * times we get unsigned chars.
75 * Casting a negative char into a unsigned char is undefined behavior.
76 */
77 static int
validate_url_character(int character)78 validate_url_character(int character)
79 {
80 /*
81 * RFCs 1738 and 3986 define a very specific range of allowed
82 * characters, but I don't think we're that concerned about URL
83 * correctness. Validating the URL properly is more involved than simply
84 * checking legal characters, anyway.
85 *
86 * What I really need this validation for is ensure that we won't get
87 * any trouble later, when we attempt to convert the global URI to a
88 * local file.
89 *
90 * Sample trouble: Getting UTF-8 characters. Why are they trouble?
91 * Because we don't have any guarantees that the system's file name
92 * encoding is UTF-8. URIs are not supposed to contain UTF-8 in the
93 * first place, so we have no reason to deal with encoding conversion.
94 *
95 * To be perfectly fair, we have no guarantees that the system's file
96 * name encoding is ASCII-compatible either, but I need to hang onto
97 * SOMETHING.
98 *
99 * (Asking users to use UTF-8 is fine, but asking users to use something
100 * ASCII-compatible is a little better.)
101 *
102 * So just make sure that the character is printable ASCII.
103 *
104 * TODO (next iteration) Consider exhaustive URL validation.
105 */
106 return (0x20 <= character && character <= 0x7E)
107 ? 0
108 : pr_val_err("URL has non-printable character code '%d'.", character);
109 }
110
111 /**
112 * Initializes @uri->global* by cloning @str.
113 * This function does not assume that @str is null-terminated.
114 */
115 static int
str2global(char const * str,size_t str_len,struct rpki_uri * uri)116 str2global(char const *str, size_t str_len, struct rpki_uri *uri)
117 {
118 int error;
119 size_t i;
120
121 for (i = 0; i < str_len; i++) {
122 error = validate_url_character(str[i]);
123 if (error)
124 return error;
125 }
126
127 uri->global = malloc(str_len + 1);
128 if (uri->global == NULL)
129 return pr_enomem();
130 strncpy(uri->global, str, str_len);
131 uri->global[str_len] = '\0';
132 uri->global_len = str_len;
133
134 return 0;
135 }
136
137 static bool
is_valid_mft_file_chara(uint8_t chara)138 is_valid_mft_file_chara(uint8_t chara)
139 {
140 return ('a' <= chara && chara <= 'z')
141 || ('A' <= chara && chara <= 'Z')
142 || ('0' <= chara && chara <= '9')
143 || (chara == '-')
144 || (chara == '_');
145 }
146
147 /* RFC 6486bis, section 4.2.2 */
148 static int
validate_mft_file(IA5String_t * ia5)149 validate_mft_file(IA5String_t *ia5)
150 {
151 size_t dot;
152 size_t i;
153
154 if (ia5->size < 5)
155 return pr_val_err("File name is too short (%zu < 5).", ia5->size);
156 dot = ia5->size - 4;
157 if (ia5->buf[dot] != '.')
158 return pr_val_err("File name seems to lack a three-letter extension.");
159
160 for (i = 0; i < ia5->size; i++) {
161 if (i != dot && !is_valid_mft_file_chara(ia5->buf[i])) {
162 return pr_val_err("File name contains illegal character #%u",
163 ia5->buf[i]);
164 }
165 }
166
167 /*
168 * Actual extension doesn't matter; if there's no handler,
169 * we'll naturally ignore the file.
170 */
171 return 0;
172 }
173
174 /**
175 * Initializes @uri->global given manifest path @mft and its referenced file
176 * @ia5.
177 *
178 * ie. if @mft is "rsync://a/b/c.mft" and @ia5 is "d.cer", @uri->global will
179 * be "rsync://a/b/d.cer".
180 *
181 * Assumes that @mft is a "global" URL. (ie. extracted from rpki_uri.global.)
182 */
183 static int
ia5str2global(struct rpki_uri * uri,char const * mft,IA5String_t * ia5)184 ia5str2global(struct rpki_uri *uri, char const *mft, IA5String_t *ia5)
185 {
186 char *joined;
187 char *slash_pos;
188 int dir_len;
189 int error;
190
191 /*
192 * IA5String is a subset of ASCII. However, IA5String_t doesn't seem to
193 * be guaranteed to be NULL-terminated.
194 * `(char *) ia5->buf` is fair, but `strlen(ia5->buf)` is not.
195 */
196
197 error = validate_mft_file(ia5);
198 if (error)
199 return error;
200
201 slash_pos = strrchr(mft, '/');
202 if (slash_pos == NULL)
203 return pr_val_err("Manifest URL '%s' contains no slashes.", mft);
204
205 dir_len = (slash_pos + 1) - mft;
206 joined = malloc(dir_len + ia5->size + 1);
207 if (joined == NULL)
208 return pr_enomem();
209
210 strncpy(joined, mft, dir_len);
211 strncpy(joined + dir_len, (char *) ia5->buf, ia5->size);
212 joined[dir_len + ia5->size] = '\0';
213
214 uri->global = joined;
215 uri->global_len = dir_len + ia5->size;
216 return 0;
217 }
218
219 static int
validate_uri_begin(char const * uri_pfx,const size_t uri_pfx_len,char const * global,size_t global_len,int error)220 validate_uri_begin(char const *uri_pfx, const size_t uri_pfx_len,
221 char const *global, size_t global_len, int error)
222 {
223 if (global_len < uri_pfx_len
224 || strncasecmp(uri_pfx, global, uri_pfx_len) != 0) {
225 if (!error)
226 return -EINVAL;
227 pr_val_err("Global URI '%s' does not begin with '%s'.",
228 global, uri_pfx);
229 return error;
230 }
231
232 return 0;
233 }
234
235 static int
validate_gprefix(char const * global,size_t global_len,uint8_t flags,enum rpki_uri_type * type)236 validate_gprefix(char const *global, size_t global_len, uint8_t flags,
237 enum rpki_uri_type *type)
238 {
239 size_t const PFX_RSYNC_LEN = strlen(PFX_RSYNC);
240 size_t const PFX_HTTPS_LEN = strlen(PFX_HTTPS);
241 uint8_t l_flags;
242 int error;
243
244 /* Exclude RSYNC RRDP flag, isn't relevant here */
245 l_flags = flags & ~URI_USE_RRDP_WORKSPACE;
246
247 if (l_flags == URI_VALID_RSYNC) {
248 (*type) = URI_RSYNC;
249 return validate_uri_begin(PFX_RSYNC, PFX_RSYNC_LEN, global,
250 global_len, ENOTRSYNC);
251 }
252 if (l_flags == URI_VALID_HTTPS) {
253 (*type) = URI_HTTPS;
254 return validate_uri_begin(PFX_HTTPS, PFX_HTTPS_LEN, global,
255 global_len, ENOTHTTPS);
256 }
257 if (l_flags != (URI_VALID_RSYNC | URI_VALID_HTTPS))
258 pr_crit("Unknown URI flag");
259
260 /* It has both flags */
261 error = validate_uri_begin(PFX_RSYNC, PFX_RSYNC_LEN, global, global_len,
262 0);
263 if (!error) {
264 (*type) = URI_RSYNC;
265 return 0;
266 }
267 error = validate_uri_begin(PFX_HTTPS, PFX_HTTPS_LEN, global, global_len,
268 0);
269 if (error) {
270 pr_val_warn("URI '%s' does not begin with '%s' nor '%s'.",
271 global, PFX_RSYNC, PFX_HTTPS);
272 return ENOTSUPPORTED;
273 }
274
275 /* @size was already set */
276 (*type) = URI_HTTPS;
277 return 0;
278 }
279
280 static int
get_local_workspace(char ** result)281 get_local_workspace(char **result)
282 {
283 char const *workspace;
284 char *tmp;
285
286 workspace = db_rrdp_uris_workspace_get();
287 if (workspace == NULL) {
288 *result = NULL;
289 return 0;
290 }
291
292 tmp = strdup(workspace);
293 if (tmp == NULL)
294 return pr_enomem();
295
296 *result = tmp;
297 return 0;
298 }
299
300 /**
301 * Initializes @uri->local by converting @uri->global.
302 *
303 * For example, given local cache repository "/tmp/rpki" and global uri
304 * "rsync://rpki.ripe.net/repo/manifest.mft", initializes @uri->local as
305 * "/tmp/rpki/rpki.ripe.net/repo/manifest.mft".
306 *
307 * By contract, if @guri is not RSYNC nor HTTPS, this will return ENOTRSYNC.
308 * This often should not be treated as an error; please handle gracefully.
309 */
310 static int
g2l(char const * global,size_t global_len,uint8_t flags,char ** result,enum rpki_uri_type * result_type)311 g2l(char const *global, size_t global_len, uint8_t flags, char **result,
312 enum rpki_uri_type *result_type)
313 {
314 char *local;
315 char *workspace;
316 enum rpki_uri_type type;
317 int error;
318
319 error = validate_gprefix(global, global_len, flags, &type);
320 if (error)
321 return error;
322
323 workspace = NULL;
324 if ((flags & URI_USE_RRDP_WORKSPACE) != 0) {
325 error = get_local_workspace(&workspace);
326 if (error)
327 return error;
328 }
329
330 error = map_uri_to_local(global,
331 type == URI_RSYNC ? PFX_RSYNC : PFX_HTTPS,
332 workspace,
333 &local);
334 if (error) {
335 free(workspace);
336 return error;
337 }
338
339 free(workspace);
340 *result = local;
341 (*result_type) = type;
342 return 0;
343 }
344
345 static int
autocomplete_local(struct rpki_uri * uri,uint8_t flags)346 autocomplete_local(struct rpki_uri *uri, uint8_t flags)
347 {
348 return g2l(uri->global, uri->global_len, flags, &uri->local,
349 &uri->type);
350 }
351
352 static int
uri_create(struct rpki_uri ** result,uint8_t flags,void const * guri,size_t guri_len)353 uri_create(struct rpki_uri **result, uint8_t flags, void const *guri,
354 size_t guri_len)
355 {
356 struct rpki_uri *uri;
357 int error;
358
359 uri = malloc(sizeof(struct rpki_uri));
360 if (uri == NULL)
361 return pr_enomem();
362
363 error = str2global(guri, guri_len, uri);
364 if (error) {
365 free(uri);
366 return error;
367 }
368
369 error = autocomplete_local(uri, flags);
370 if (error) {
371 free(uri->global);
372 free(uri);
373 return error;
374 }
375
376 uri->references = 1;
377 *result = uri;
378 return 0;
379 }
380
381 int
uri_create_rsync_str_rrdp(struct rpki_uri ** uri,char const * guri,size_t guri_len)382 uri_create_rsync_str_rrdp(struct rpki_uri **uri, char const *guri,
383 size_t guri_len)
384 {
385 return uri_create(uri, URI_VALID_RSYNC | URI_USE_RRDP_WORKSPACE, guri,
386 guri_len);
387 }
388
389 int
uri_create_https_str_rrdp(struct rpki_uri ** uri,char const * guri,size_t guri_len)390 uri_create_https_str_rrdp(struct rpki_uri **uri, char const *guri,
391 size_t guri_len)
392 {
393 return uri_create(uri, URI_VALID_HTTPS | URI_USE_RRDP_WORKSPACE, guri,
394 guri_len);
395 }
396
397 int
uri_create_rsync_str(struct rpki_uri ** uri,char const * guri,size_t guri_len)398 uri_create_rsync_str(struct rpki_uri **uri, char const *guri, size_t guri_len)
399 {
400 return uri_create(uri, URI_VALID_RSYNC, guri, guri_len);
401 }
402
403 /*
404 * A URI that can be rsync or https.
405 *
406 * Return ENOTSUPPORTED if not an rsync or https URI.
407 */
408 int
uri_create_mixed_str(struct rpki_uri ** uri,char const * guri,size_t guri_len)409 uri_create_mixed_str(struct rpki_uri **uri, char const *guri, size_t guri_len)
410 {
411 return uri_create(uri, URI_VALID_RSYNC | URI_VALID_HTTPS, guri,
412 guri_len);
413 }
414
415 /*
416 * Manifest fileList entries are a little special in that they're just file
417 * names. This function will infer the rest of the URL.
418 */
419 int
uri_create_mft(struct rpki_uri ** result,struct rpki_uri * mft,IA5String_t * ia5,bool use_rrdp_workspace)420 uri_create_mft(struct rpki_uri **result, struct rpki_uri *mft, IA5String_t *ia5,
421 bool use_rrdp_workspace)
422 {
423 struct rpki_uri *uri;
424 uint8_t flags;
425 int error;
426
427 uri = malloc(sizeof(struct rpki_uri));
428 if (uri == NULL)
429 return pr_enomem();
430
431 error = ia5str2global(uri, mft->global, ia5);
432 if (error) {
433 free(uri);
434 return error;
435 }
436
437 flags = URI_VALID_RSYNC;
438 if (use_rrdp_workspace)
439 flags |= URI_USE_RRDP_WORKSPACE;
440
441 error = autocomplete_local(uri, flags);
442 if (error) {
443 free(uri->global);
444 free(uri);
445 return error;
446 }
447
448 uri->references = 1;
449 *result = uri;
450 return 0;
451 }
452
453 /*
454 * Create @uri from the @ad, validating that the uri is of type(s) indicated
455 * at @flags (can be URI_VALID_RSYNC and/or URI_VALID_HTTPS)
456 */
457 int
uri_create_ad(struct rpki_uri ** uri,ACCESS_DESCRIPTION * ad,int flags)458 uri_create_ad(struct rpki_uri **uri, ACCESS_DESCRIPTION *ad, int flags)
459 {
460 ASN1_STRING *asn1_string;
461 int type;
462
463 asn1_string = GENERAL_NAME_get0_value(ad->location, &type);
464
465 /*
466 * RFC 6487: "This extension MUST have an instance of an
467 * AccessDescription with an accessMethod of id-ad-rpkiManifest, (...)
468 * with an rsync URI [RFC5781] form of accessLocation."
469 *
470 * Ehhhhhh. It's a little annoying in that it seems to be stucking more
471 * than one requirement in a single sentence, which I think is rather
472 * rare for an RFC. Normally they tend to hammer things more.
473 *
474 * Does it imply that the GeneralName CHOICE is constrained to type
475 * "uniformResourceIdentifier"? I guess so, though I don't see anything
476 * stopping a few of the other types from also being capable of storing
477 * URIs.
478 *
479 * Also, nobody seems to be using the other types, and handling them
480 * would be a titanic pain in the ass. So this is what I'm committing
481 * to.
482 */
483 if (type != GEN_URI) {
484 pr_val_err("Unknown GENERAL_NAME type: %d", type);
485 return ENOTSUPPORTED;
486 }
487
488 /*
489 * GEN_URI signals an IA5String.
490 * IA5String is a subset of ASCII, so this cast is safe.
491 * No guarantees of a NULL chara, though.
492 *
493 * TODO (testers) According to RFC 5280, accessLocation can be an IRI
494 * somehow converted into URI form. I don't think that's an issue
495 * because the RSYNC clone operation should not have performed the
496 * conversion, so we should be looking at precisely the IA5String
497 * directory our g2l version of @asn1_string should contain.
498 * But ask the testers to keep an eye on it anyway.
499 */
500 return uri_create(uri, flags,
501 ASN1_STRING_get0_data(asn1_string),
502 ASN1_STRING_length(asn1_string));
503 }
504
505 void
uri_refget(struct rpki_uri * uri)506 uri_refget(struct rpki_uri *uri)
507 {
508 uri->references++;
509 }
510
511 void
uri_refput(struct rpki_uri * uri)512 uri_refput(struct rpki_uri *uri)
513 {
514 uri->references--;
515 if (uri->references == 0) {
516 free(uri->global);
517 free(uri->local);
518 free(uri);
519 }
520 }
521
522 char const *
uri_get_global(struct rpki_uri * uri)523 uri_get_global(struct rpki_uri *uri)
524 {
525 return uri->global;
526 }
527
528 char const *
uri_get_local(struct rpki_uri * uri)529 uri_get_local(struct rpki_uri *uri)
530 {
531 return uri->local;
532 }
533
534 size_t
uri_get_global_len(struct rpki_uri * uri)535 uri_get_global_len(struct rpki_uri *uri)
536 {
537 return uri->global_len;
538 }
539
540 bool
uri_equals(struct rpki_uri * u1,struct rpki_uri * u2)541 uri_equals(struct rpki_uri *u1, struct rpki_uri *u2)
542 {
543 return strcmp(u1->global, u2->global) == 0;
544 }
545
546 /* @ext must include the period. */
547 bool
uri_has_extension(struct rpki_uri * uri,char const * ext)548 uri_has_extension(struct rpki_uri *uri, char const *ext)
549 {
550 size_t ext_len;
551 int cmp;
552
553 ext_len = strlen(ext);
554 if (uri->global_len < ext_len)
555 return false;
556
557 cmp = strncmp(uri->global + uri->global_len - ext_len, ext, ext_len);
558 return cmp == 0;
559 }
560
561 bool
uri_is_certificate(struct rpki_uri * uri)562 uri_is_certificate(struct rpki_uri *uri)
563 {
564 return uri_has_extension(uri, ".cer");
565 }
566
567 bool
uri_is_rsync(struct rpki_uri * uri)568 uri_is_rsync(struct rpki_uri *uri)
569 {
570 return uri->type == URI_RSYNC;
571 }
572
573 static char const *
get_filename(char const * file_path)574 get_filename(char const *file_path)
575 {
576 char *slash = strrchr(file_path, '/');
577 return (slash != NULL) ? (slash + 1) : file_path;
578 }
579
580 static char const *
uri_get_printable(struct rpki_uri * uri,enum filename_format format)581 uri_get_printable(struct rpki_uri *uri, enum filename_format format)
582 {
583 switch (format) {
584 case FNF_GLOBAL:
585 return uri->global;
586 case FNF_LOCAL:
587 return uri->local;
588 case FNF_NAME:
589 return get_filename(uri->global);
590 }
591
592 pr_crit("Unknown file name format: %u", format);
593 return NULL;
594 }
595
596 char const *
uri_val_get_printable(struct rpki_uri * uri)597 uri_val_get_printable(struct rpki_uri *uri)
598 {
599 enum filename_format format;
600
601 format = config_get_val_log_filename_format();
602 return uri_get_printable(uri, format);
603 }
604
605 char const *
uri_op_get_printable(struct rpki_uri * uri)606 uri_op_get_printable(struct rpki_uri *uri)
607 {
608 enum filename_format format;
609
610 format = config_get_op_log_filename_format();
611 return uri_get_printable(uri, format);
612 }
613