1 /*
2  * Copyright(c) 2014-2018 Tim Ruehsen
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  *
22  * This file is part of libpsl.
23  *
24  * Public Suffix List routines
25  *
26  * Changelog
27  * 19.03.2014  Tim Ruehsen  created from libmget/cookie.c
28  *
29  */
30 
31 #if HAVE_CONFIG_H
32 # include <config.h>
33 #endif
34 
35 #if defined(__GNUC__) && defined(__GNUC_MINOR__)
36 #       define GCC_VERSION_AT_LEAST(major, minor) ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
37 #else
38 #       define GCC_VERSION_AT_LEAST(major, minor) 0
39 #endif
40 
41 #if GCC_VERSION_AT_LEAST(2,95)
42 #  define PSL_UNUSED __attribute__ ((unused))
43 #else
44 #  define PSL_UNUSED
45 #endif
46 
47 #include <sys/types.h>
48 #include <sys/stat.h>
49 
50 #ifdef _WIN32
51 # include <winsock2.h>
52 # include <ws2tcpip.h>
53 #else
54 # include <sys/socket.h>
55 # include <netinet/in.h>
56 # include <unistd.h>
57 #endif
58 
59 #if defined(_MSC_VER) && ! defined(ssize_t)
60 # include <basetsd.h>
61 typedef SSIZE_T ssize_t;
62 #endif
63 
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <string.h>
67 #ifdef HAVE_STRINGS_H
68 # include <strings.h>
69 #endif
70 #include <ctype.h>
71 #include <time.h>
72 #include <errno.h>
73 #include <limits.h> /* for UINT_MAX */
74 
75 #ifdef HAVE_NL_LANGINFO
76 # include <langinfo.h>
77 #endif
78 
79 #ifndef _WIN32
80 # include <arpa/inet.h>
81 #endif
82 
83 #ifdef HAVE_ALLOCA_H
84 #	include <alloca.h>
85 #endif
86 
87 #ifdef WITH_LIBICU
88 #	include <unicode/uversion.h>
89 #	include <unicode/ustring.h>
90 #	include <unicode/uidna.h>
91 #	include <unicode/ucnv.h>
92 #elif defined(WITH_LIBIDN2)
93 #	include <iconv.h>
94 #	include <idn2.h>
95 #	include <unicase.h>
96 #	include <unistr.h>
97 #elif defined(WITH_LIBIDN)
98 #	include <iconv.h>
99 #	include <stringprep.h>
100 #	include <idna.h>
101 #	include <unicase.h>
102 #	include <unistr.h>
103 #endif
104 
105 #ifndef WINICONV_CONST
106 #  define WINICONV_CONST
107 #endif
108 
109 #include <libpsl.h>
110 
111 /**
112  * SECTION:libpsl
113  * @short_description: Public Suffix List library functions
114  * @title: libpsl
115  * @stability: Stable
116  * @include: libpsl.h
117  *
118  * [Public Suffix List](https://publicsuffix.org/) library functions.
119  *
120  */
121 
122 #define countof(a) (sizeof(a)/sizeof(*(a)))
123 
124 #define PRIV_PSL_FLAG_EXCEPTION (1<<0)
125 #define PRIV_PSL_FLAG_WILDCARD  (1<<1)
126 #define PRIV_PSL_FLAG_ICANN     (1<<2) /* entry of ICANN section */
127 #define PRIV_PSL_FLAG_PRIVATE   (1<<3) /* entry of PRIVATE section */
128 #define PRIV_PSL_FLAG_PLAIN     (1<<4) /* just used for PSL syntax checking */
129 
130 typedef struct {
131 	char
132 		label_buf[48];
133 	const char *
134 		label;
135 	unsigned short
136 		length;
137 	unsigned char
138 		nlabels, /* number of labels */
139 		flags;
140 } psl_entry_t;
141 
142 /* stripped down version libmget vector routines */
143 typedef struct {
144 	int
145 		(*cmp)(const psl_entry_t **, const psl_entry_t **); /* comparison function */
146 	psl_entry_t
147 		**entry; /* pointer to array of pointers to elements */
148 	int
149 		max,     /* allocated elements */
150 		cur;     /* number of elements in use */
151 } psl_vector_t;
152 
153 struct psl_ctx_st {
154 	psl_vector_t
155 		*suffixes;
156 	unsigned char
157 		*dafsa;
158 	size_t
159 		dafsa_size;
160 	int
161 		nsuffixes,
162 		nexceptions,
163 		nwildcards;
164 	unsigned
165 		utf8 : 1; /* 1: data contains UTF-8 + punycode encoded rules */
166 };
167 
168 /* include the PSL data generated by psl-make-dafsa */
169 #if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
170 #include "suffixes_dafsa.h"
171 #else
172 static const unsigned char kDafsa[] = "";
173 static time_t _psl_file_time = 0;
174 static int _psl_nsuffixes = 0;
175 static int _psl_nexceptions = 0;
176 static int _psl_nwildcards = 0;
177 static const char _psl_sha1_checksum[] = "";
178 static const char _psl_filename[] = "";
179 #endif
180 
181 /* references to these PSLs will result in lookups to built-in data */
182 static const psl_ctx_t
183 	builtin_psl;
184 
185 #ifdef PSL_DISTFILE
186 static const char _psl_dist_filename[] = PSL_DISTFILE;
187 #else
188 static const char _psl_dist_filename[] = "";
189 #endif
190 
vector_alloc(int max,int (* cmp)(const psl_entry_t **,const psl_entry_t **))191 static psl_vector_t *vector_alloc(int max, int (*cmp)(const psl_entry_t **, const psl_entry_t **))
192 {
193 	psl_vector_t *v;
194 
195 	if (!(v = calloc(1, sizeof(psl_vector_t))))
196 		return NULL;
197 
198 	if (!(v->entry = malloc(max * sizeof(psl_entry_t *)))) {
199 		free(v);
200 		return NULL;
201 	}
202 
203 	v->max = max;
204 	v->cmp = cmp;
205 	return v;
206 }
207 
vector_free(psl_vector_t ** v)208 static void vector_free(psl_vector_t **v)
209 {
210 	if (v && *v) {
211 		if ((*v)->entry) {
212 			int it;
213 
214 			for (it = 0; it < (*v)->cur; it++)
215 				free((*v)->entry[it]);
216 
217 			free((*v)->entry);
218 		}
219 		free(*v);
220 	}
221 }
222 
vector_get(const psl_vector_t * v,int pos)223 static psl_entry_t *vector_get(const psl_vector_t *v, int pos)
224 {
225 	if (pos < 0 || !v || pos >= v->cur) return NULL;
226 
227 	return v->entry[pos];
228 }
229 
230 /* the entries must be sorted by */
vector_find(const psl_vector_t * v,const psl_entry_t * elem)231 static int vector_find(const psl_vector_t *v, const psl_entry_t *elem)
232 {
233 	if (v) {
234 		int l, r, m;
235 		int res;
236 
237 		/* binary search for element (exact match) */
238 		for (l = 0, r = v->cur - 1; l <= r;) {
239 			m = (l + r) / 2;
240 			if ((res = v->cmp(&elem, (const psl_entry_t **)&(v->entry[m]))) > 0) l = m + 1;
241 			else if (res < 0) r = m - 1;
242 			else return m;
243 		}
244 	}
245 
246 	return -1; /* not found */
247 }
248 
vector_add(psl_vector_t * v,const psl_entry_t * elem)249 static int vector_add(psl_vector_t *v, const psl_entry_t *elem)
250 {
251 	if (v) {
252 		void *elemp;
253 
254 		if (!(elemp = malloc(sizeof(psl_entry_t))))
255 			return -1;
256 
257 		memcpy(elemp, elem, sizeof(psl_entry_t));
258 
259 		if (v->max == v->cur) {
260 			void *m = realloc(v->entry, (v->max *= 2) * sizeof(psl_entry_t *));
261 
262 			if (m)
263 				v->entry = m;
264 			else {
265 				free(elemp);
266 				return -1;
267 			}
268 		}
269 
270 		v->entry[v->cur++] = elemp;
271 		return v->cur - 1;
272 	}
273 
274 	return -1;
275 }
276 
vector_sort(psl_vector_t * v)277 static void vector_sort(psl_vector_t *v)
278 {
279 	if (v && v->cmp)
280 		qsort(v->entry, v->cur, sizeof(psl_vector_t **), (int(*)(const void *, const void *))v->cmp);
281 }
282 
283 /* by this kind of sorting, we can easily see if a domain matches or not */
suffix_compare(const psl_entry_t * s1,const psl_entry_t * s2)284 static int suffix_compare(const psl_entry_t *s1, const psl_entry_t *s2)
285 {
286 	int n;
287 
288 	if ((n = s2->nlabels - s1->nlabels))
289 		return n; /* most labels first */
290 
291 	if ((n = s1->length - s2->length))
292 		return n;  /* shorter rules first */
293 
294 	return strcmp(s1->label ? s1->label : s1->label_buf, s2->label ? s2->label : s2->label_buf);
295 }
296 
297 /* needed to sort array of pointers, given to qsort() */
suffix_compare_array(const psl_entry_t ** s1,const psl_entry_t ** s2)298 static int suffix_compare_array(const psl_entry_t **s1, const psl_entry_t **s2)
299 {
300 	return suffix_compare(*s1, *s2);
301 }
302 
suffix_init(psl_entry_t * suffix,const char * rule,size_t length)303 static int suffix_init(psl_entry_t *suffix, const char *rule, size_t length)
304 {
305 	const char *src;
306 	char *dst;
307 
308 	suffix->label = suffix->label_buf;
309 
310 	if (length >= sizeof(suffix->label_buf) - 1) {
311 		suffix->nlabels = 0;
312 		/* fprintf(stderr, "Suffix rule too long (%zd, ignored): %s\n", length, rule); */
313 		return -1;
314 	}
315 
316 	suffix->length = (unsigned char)length;
317 
318 	suffix->nlabels = 1;
319 
320 	for (dst = suffix->label_buf, src = rule; *src;) {
321 		if (*src == '.')
322 			suffix->nlabels++;
323 		*dst++ = *src++;
324 	}
325 	*dst = 0;
326 
327 	return 0;
328 }
329 
330 #if !defined(WITH_LIBIDN) && !defined(WITH_LIBIDN2) && !defined(WITH_LIBICU)
331 /*
332  * When configured without runtime IDNA support (./configure --disable-runtime), we need a pure ASCII
333  * representation of non-ASCII characters in labels as found in UTF-8 domain names.
334  * This is because the current DAFSA format used may only hold character values [21..127].
335  *
336   Code copied from http://www.nicemice.net/idn/punycode-spec.gz on
337   2011-01-04 with SHA-1 a966a8017f6be579d74a50a226accc7607c40133
338   labeled punycode-spec 1.0.3 (2006-Mar-24-Thu).  It is modified for
339   libpsl by Tim Rühsen.  License on the original code:
340 
341   punycode-spec 1.0.3 (2006-Mar-23-Thu)
342   http://www.nicemice.net/idn/
343   Adam M. Costello
344   http://www.nicemice.net/amc/
345 
346   B. Disclaimer and license
347 
348     Regarding this entire document or any portion of it (including
349     the pseudocode and C code), the author makes no guarantees and
350     is not responsible for any damage resulting from its use.  The
351     author grants irrevocable permission to anyone to use, modify,
352     and distribute it in any way that does not diminish the rights
353     of anyone else to use, modify, and distribute it, provided that
354     redistributed derivative works do not contain misleading author or
355     version information.  Derivative works need not be licensed under
356     similar terms.
357 
358   C. Punycode sample implementation
359 
360   punycode-sample.c 2.0.0 (2004-Mar-21-Sun)
361   http://www.nicemice.net/idn/
362   Adam M. Costello
363   http://www.nicemice.net/amc/
364 
365   This is ANSI C code (C89) implementing Punycode 1.0.x.
366  */
367 enum punycode_status {
368 	punycode_success = 0,
369 	punycode_bad_input = 1, /* Input is invalid.                       */
370 	punycode_big_output = 2, /* Output would exceed the space provided. */
371 	punycode_overflow = 3 /* Wider integers needed to process input. */
372 };
373 
374 #ifdef PUNYCODE_UINT
375 	typedef PUNYCODE_UINT punycode_uint;
376 #elif UINT_MAX >= (1 << 26) - 1
377 	typedef unsigned int punycode_uint;
378 #else
379 	typedef unsigned long punycode_uint;
380 #endif
381 
382 /*** Bootstring parameters for Punycode ***/
383 enum {
384 	base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700,
385 	initial_bias = 72, initial_n = 0x80, delimiter = 0x2D
386 };
387 
encode_digit(punycode_uint d)388 static char encode_digit(punycode_uint d)
389 {
390 	return d + 22 + 75 * (d < 26);
391 	/*  0..25 map to ASCII a..z or A..Z */
392 	/* 26..35 map to ASCII 0..9         */
393 }
394 #define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26)
395 static const punycode_uint maxint = -1;
396 
adapt(punycode_uint delta,punycode_uint numpoints,int firsttime)397 static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime)
398 {
399 	punycode_uint k;
400 
401 	delta = firsttime ? delta / damp : delta >> 1;
402 	/* delta >> 1 is a faster way of doing delta / 2 */
403 	delta += delta / numpoints;
404 
405 	for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
406 		delta /= base - tmin;
407 	}
408 
409 	return k + (base - tmin + 1) * delta / (delta + skew);
410 }
411 
punycode_encode(size_t input_length_orig,const punycode_uint input[],size_t * output_length,char output[])412 static enum punycode_status punycode_encode(
413 	size_t input_length_orig,
414 	const punycode_uint input[],
415 	size_t *output_length,
416 	char output[])
417 {
418 	punycode_uint input_length, n, delta, h, b, bias, j, m, q, k, t;
419 	size_t out, max_out;
420 
421 	/* The Punycode spec assumes that the input length is the same type */
422 	/* of integer as a code point, so we need to convert the size_t to  */
423 	/* a punycode_uint, which could overflow.                           */
424 
425 	if (input_length_orig > maxint)
426 		return punycode_overflow;
427 
428 	input_length = (punycode_uint) input_length_orig;
429 
430 	/* Initialize the state: */
431 
432 	n = initial_n;
433 	delta = 0;
434 	out = 0;
435 	max_out = *output_length;
436 	bias = initial_bias;
437 
438 	/* Handle the basic code points: */
439 	for (j = 0; j < input_length; ++j) {
440 		if (input[j] < 0x80) {
441 			if (max_out - out < 2)
442 				return punycode_big_output;
443 			output[out++] = (char) input[j];
444 		}
445 		/* else if (input[j] < n) return punycode_bad_input; */
446 		/* (not needed for Punycode with unsigned code points) */
447 	}
448 
449 	h = b = (punycode_uint) out;
450 	/* cannot overflow because out <= input_length <= maxint */
451 
452 	/* h is the number of code points that have been handled, b is the  */
453 	/* number of basic code points, and out is the number of ASCII code */
454 	/* points that have been output.                                    */
455 
456 	if (b > 0)
457 		output[out++] = delimiter;
458 
459 	/* Main encoding loop: */
460 
461 	while (h < input_length) {
462 		/* All non-basic code points < n have been     */
463 		/* handled already.  Find the next larger one: */
464 
465 		for (m = maxint, j = 0; j < input_length; ++j) {
466 			/* if (basic(input[j])) continue; */
467 			/* (not needed for Punycode) */
468 			if (input[j] >= n && input[j] < m)
469 				m = input[j];
470 		}
471 
472 		/* Increase delta enough to advance the decoder's    */
473 		/* <n,i> state to <m,0>, but guard against overflow: */
474 
475 		if (m - n > (maxint - delta) / (h + 1))
476 			return punycode_overflow;
477 		delta += (m - n) * (h + 1);
478 		n = m;
479 
480 		for (j = 0; j < input_length; ++j) {
481 			/* Punycode does not need to check whether input[j] is basic: */
482 			if (input[j] < n /* || basic(input[j]) */) {
483 				if (++delta == 0)
484 					return punycode_overflow;
485 			}
486 
487 			if (input[j] == n) {
488 				/* Represent delta as a generalized variable-length integer: */
489 
490 				for (q = delta, k = base;; k += base) {
491 					if (out >= max_out)
492 						return punycode_big_output;
493 					t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
494 						k >= bias + tmax ? tmax : k - bias;
495 					if (q < t)
496 						break;
497 					output[out++] = encode_digit(t + (q - t) % (base - t));
498 					q = (q - t) / (base - t);
499 				}
500 
501 				output[out++] = encode_digit(q);
502 				bias = adapt(delta, h + 1, h == b);
503 				delta = 0;
504 				++h;
505 			}
506 		}
507 
508 		++delta, ++n;
509 	}
510 
511 	*output_length = out;
512 	return punycode_success;
513 }
514 
utf8_to_utf32(const char * in,size_t inlen,punycode_uint * out,size_t outlen)515 static ssize_t utf8_to_utf32(const char *in, size_t inlen, punycode_uint *out, size_t outlen)
516 {
517 	size_t n = 0;
518 	const unsigned char *s = (void *)in;
519 	const unsigned char *e = (void *)(in + inlen);
520 
521 	if (!outlen)
522 		return -1;
523 
524 	outlen--;
525 
526 	while (n < outlen) {
527 		size_t inleft = e - s;
528 
529 		if (inleft >= 1 && (*s & 0x80) == 0) { /* 0xxxxxxx ASCII char */
530 			out[n++] = *s;
531 			s++;
532 		} else if (inleft >= 2 && (*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
533 			if ((s[1] & 0xC0) != 0x80)
534 				return -1;
535 			out[n++] = ((*s & 0x1F) << 6) | (s[1] & 0x3F);
536 			s += 2;
537 		} else if (inleft >= 3 && (*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
538 			if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
539 				return -1;
540 			out[n++] = ((*s & 0x0F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
541 			s += 3;
542 		} else if (inleft >= 4 && (*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
543 			if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
544 				return -1;
545 			out[n++] = ((*s & 0x07) << 18) | ((s[1] & 0x3F) << 12) | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);
546 			s += 4;
547 		} else if (!inleft) {
548 			break;
549 		} else
550 			return -1;
551 	}
552 
553 	return n;
554 }
555 
mem_is_ascii(const char * s,size_t n)556 static int mem_is_ascii(const char *s, size_t n)
557 {
558 	for (; n; n--) /* 'while(n--)' generates unsigned integer overflow on n = 0 */
559 		if (*((unsigned char *)s++) >= 128)
560 			return 0;
561 
562 	return 1;
563 }
564 
domain_to_punycode(const char * domain,char * out,size_t outsize)565 static int domain_to_punycode(const char *domain, char *out, size_t outsize)
566 {
567 	size_t outlen = 0, labellen;
568 	punycode_uint input[256];
569 	const char *label, *e;
570 
571 	for (e = label = domain; e; label = e + 1) {
572 		e = strchr(label, '.');
573 		labellen = e ? (size_t) (e - label) : strlen(label);
574 		/* printf("s=%s inlen=%zd\n", label, labellen); */
575 
576 		if (mem_is_ascii(label, labellen)) {
577 			if (outlen + labellen + (e != NULL) >= outsize)
578 				return 1;
579 
580 			/* printf("outlen=%zd labellen=%zd\n", outlen, labellen); */
581 			memcpy(out + outlen, label, labellen);
582 			outlen += labellen;
583 		} else {
584 			ssize_t inputlen = 0;
585 
586 			if (outlen + labellen + (e != NULL) + 4 >= outsize)
587 				return 1;
588 
589 			if ((inputlen = utf8_to_utf32(label, labellen, input, countof(input))) < 0)
590 				return 1;
591 
592 			memcpy(out + outlen, "xn--", 4);
593 			outlen += 4;
594 
595 			labellen = outsize - outlen;
596 			/* printf("n=%zd space_left=%zd\n", n, labellen); */
597 			if (punycode_encode(inputlen, input, &labellen, out + outlen))
598 				return 1;
599 			outlen += labellen;
600 		}
601 
602 		if (e)
603 			out[outlen++] = '.';
604 		out[outlen] = 0;
605 	}
606 
607 	return 0;
608 }
609 #endif
610 
isspace_ascii(const char c)611 static int isspace_ascii(const char c)
612 {
613 	return c == ' ' || c == '\t' || c == '\r' || c == '\n';
614 }
615 
str_is_ascii(const char * s)616 static int str_is_ascii(const char *s)
617 {
618 	while (*s && *((unsigned char *)s) < 128) s++;
619 
620 	return !*s;
621 }
622 
623 #if defined(WITH_LIBIDN)
624 /*
625  * Work around a libidn <= 1.30 vulnerability.
626  *
627  * The function checks for a valid UTF-8 character sequence before
628  * passing it to idna_to_ascii_8z().
629  *
630  * [1] https://lists.gnu.org/archive/html/help-libidn/2015-05/msg00002.html
631  * [2] https://lists.gnu.org/archive/html/bug-wget/2015-06/msg00002.html
632  * [3] https://curl.haxx.se/mail/lib-2015-06/0143.html
633  */
utf8_is_valid(const char * utf8)634 static int utf8_is_valid(const char *utf8)
635 {
636 	const unsigned char *s = (const unsigned char *) utf8;
637 
638 	while (*s) {
639 		if ((*s & 0x80) == 0) /* 0xxxxxxx ASCII char */
640 			s++;
641 		else if ((*s & 0xE0) == 0xC0) /* 110xxxxx 10xxxxxx */ {
642 			if ((s[1] & 0xC0) != 0x80)
643 				return 0;
644 			s += 2;
645 		} else if ((*s & 0xF0) == 0xE0) /* 1110xxxx 10xxxxxx 10xxxxxx */ {
646 			if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80)
647 				return 0;
648 			s += 3;
649 		} else if ((*s & 0xF8) == 0xF0) /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ {
650 			if ((s[1] & 0xC0) != 0x80 || (s[2] & 0xC0) != 0x80 || (s[3] & 0xC0) != 0x80)
651 				return 0;
652 			s += 4;
653 		} else
654 			return 0;
655 	}
656 
657 	return 1;
658 }
659 #endif
660 
661 typedef void *psl_idna_t;
662 
psl_idna_open(void)663 static psl_idna_t *psl_idna_open(void)
664 {
665 #if defined(WITH_LIBICU)
666 	UErrorCode status = 0;
667 	return (void *)uidna_openUTS46(UIDNA_USE_STD3_RULES | UIDNA_NONTRANSITIONAL_TO_ASCII, &status);
668 #endif
669 	return NULL;
670 }
671 
psl_idna_close(psl_idna_t * idna PSL_UNUSED)672 static void psl_idna_close(psl_idna_t *idna PSL_UNUSED)
673 {
674 #if defined(WITH_LIBICU)
675 	if (idna)
676 		uidna_close((UIDNA *)idna);
677 #endif
678 }
679 
psl_idna_toASCII(psl_idna_t * idna PSL_UNUSED,const char * utf8,char ** ascii)680 static int psl_idna_toASCII(psl_idna_t *idna PSL_UNUSED, const char *utf8, char **ascii)
681 {
682 	int ret = -1;
683 
684 #if defined(WITH_LIBICU)
685 	/* IDNA2008 UTS#46 punycode conversion */
686 	if (idna) {
687 		char lookupname_buf[128] = "", *lookupname = lookupname_buf;
688 		UErrorCode status = 0;
689 		UIDNAInfo info = UIDNA_INFO_INITIALIZER;
690 		UChar utf16_dst[128], utf16_src_buf[128];
691 		UChar *utf16_src = utf16_src_buf;
692 		int32_t utf16_src_length, bytes_written;
693 		int32_t utf16_dst_length;
694 
695 		u_strFromUTF8(utf16_src, countof(utf16_src_buf), &utf16_src_length, utf8, -1, &status);
696 		if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
697 
698 		if (utf16_src_length >= (int) countof(utf16_src_buf)) {
699 			utf16_src = malloc((utf16_src_length + 1) * sizeof(UChar));
700 			if (!utf16_src) goto cleanup;
701 
702 			u_strFromUTF8(utf16_src, utf16_src_length, NULL, utf8, -1, &status);
703 			if (!U_SUCCESS(status)) goto cleanup; /* UTF-8 to UTF-16 conversion failed */
704 
705 			utf16_src[utf16_src_length] = 0; /* u_strFromUTF8() doesn't 0-terminate if dest is filled up */
706 		}
707 
708 		utf16_dst_length = uidna_nameToASCII((UIDNA *)idna, utf16_src, utf16_src_length, utf16_dst, countof(utf16_dst), &info, &status);
709 		if (!U_SUCCESS(status)) goto cleanup; /* to ASCII conversion failed */
710 
711 		u_strToUTF8(lookupname, sizeof(lookupname_buf), &bytes_written, utf16_dst, utf16_dst_length, &status);
712 		if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
713 
714 		if (bytes_written >= (int) sizeof(lookupname_buf)) {
715 			lookupname = malloc(bytes_written + 1);
716 			if (!lookupname) goto cleanup;
717 
718 			u_strToUTF8(lookupname, bytes_written, NULL, utf16_dst, utf16_dst_length, &status);
719 			if (!U_SUCCESS(status)) goto cleanup; /* UTF-16 to UTF-8 conversion failed */
720 
721 			lookupname[bytes_written] = 0; /* u_strToUTF8() doesn't 0-terminate if dest is filled up */
722 		} else {
723 			if (!(lookupname = strdup(lookupname)))
724 				goto cleanup;
725 		}
726 
727 		if (ascii) {
728 			*ascii = lookupname;
729 			lookupname = NULL;
730 		}
731 
732 		ret = 0;
733 
734 cleanup:
735 		if (lookupname != lookupname_buf)
736 			free(lookupname);
737 		if (utf16_src != utf16_src_buf)
738 			free(utf16_src);
739 	}
740 #elif defined(WITH_LIBIDN2)
741 #if IDN2_VERSION_NUMBER >= 0x00140000
742 	int rc;
743 
744 	/* IDN2_TRANSITIONAL automatically converts to lowercase
745 	 * IDN2_NFC_INPUT converts to NFC before toASCII conversion
746 	 * Since IDN2_TRANSITIONAL implicitly does NFC conversion, we don't need
747 	 * the additional IDN2_NFC_INPUT. But just for the unlikely case that the linked
748 	 * library is not matching the headers when building and it doesn't support TR46,
749 	 * we provide IDN2_NFC_INPUT. */
750 
751 	if ((rc = idn2_lookup_u8((uint8_t *)utf8, (uint8_t **)ascii, IDN2_NFC_INPUT | IDN2_NONTRANSITIONAL)) == IDN2_OK)
752 		ret = 0;
753 	/* else
754 		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
755 #else
756 	int rc;
757 	uint8_t *lower;
758 	size_t len = u8_strlen((uint8_t *)utf8) + 1;
759 
760 	/* we need a conversion to lowercase */
761 	if (!(lower = u8_tolower((uint8_t *)utf8, len, 0, UNINORM_NFKC, NULL, &len))) {
762 		/* fprintf(stderr, "u8_tolower(%s) failed (%d)\n", utf8, errno); */
763 		return -1;
764 	}
765 
766 	if ((rc = idn2_lookup_u8(lower, (uint8_t **)ascii, 0)) == IDN2_OK) {
767 		ret = 0;
768 	} /* else
769 		fprintf(stderr, "toASCII(%s) failed (%d): %s\n", lower, rc, idn2_strerror(rc)); */
770 
771 	free(lower);
772 #endif
773 #elif defined(WITH_LIBIDN)
774 	int rc;
775 
776 	if (!utf8_is_valid(utf8)) {
777 		/* fprintf(stderr, "Invalid UTF-8 sequence not converted: '%s'\n", utf8); */
778 		return -1;
779 	}
780 
781 	/* idna_to_ascii_8z() automatically converts UTF-8 to lowercase */
782 
783 	if ((rc = idna_to_ascii_8z(utf8, ascii, IDNA_USE_STD3_ASCII_RULES)) == IDNA_SUCCESS) {
784 		ret = 0;
785 	} /* else
786 		fprintf(stderr, "toASCII failed (%d): %s\n", rc, idna_strerror(rc)); */
787 #else
788 	char lookupname[128];
789 
790 	if (domain_to_punycode(utf8, lookupname, sizeof(lookupname)) == 0) {
791 		if (ascii)
792 			if ((*ascii = strdup(lookupname)))
793 				ret = 0;
794 	}
795 #endif
796 
797 	return ret;
798 }
799 
add_punycode_if_needed(psl_idna_t * idna,psl_vector_t * v,psl_entry_t * e)800 static void add_punycode_if_needed(psl_idna_t *idna, psl_vector_t *v, psl_entry_t *e)
801 {
802 	char *lookupname;
803 
804 	if (str_is_ascii(e->label_buf))
805 		return;
806 
807 	if (psl_idna_toASCII(idna, e->label_buf, &lookupname) == 0) {
808 		if (strcmp(e->label_buf, lookupname)) {
809 			psl_entry_t suffix, *suffixp;
810 
811 			/* fprintf(stderr, "toASCII '%s' -> '%s'\n", e->label_buf, lookupname); */
812 			if (suffix_init(&suffix, lookupname, strlen(lookupname)) == 0) {
813 				suffix.flags = e->flags;
814 				if ((suffixp = vector_get(v, vector_add(v, &suffix))))
815 					suffixp->label = suffixp->label_buf; /* set label to changed address */
816 			}
817 		} /* else ignore */
818 
819 		free(lookupname);
820 	}
821 }
822 
823 /* prototypes */
824 int LookupStringInFixedSet(const unsigned char* graph, size_t length, const char* key, size_t key_length);
825 int GetUtfMode(const unsigned char *graph, size_t length);
826 
is_public_suffix(const psl_ctx_t * psl,const char * domain,int type)827 static int is_public_suffix(const psl_ctx_t *psl, const char *domain, int type)
828 {
829 	psl_entry_t suffix;
830 	const char *p;
831 	char *punycode = NULL;
832 	int need_conversion = 0;
833 
834 	/* this function should be called without leading dots, just make sure */
835 	if (*domain == '.')
836 		domain++;
837 
838 	suffix.nlabels = 1;
839 
840 	for (p = domain; *p; p++) {
841 		if (*p == '.')
842 			suffix.nlabels++;
843 		else if (*((unsigned char *)p) >= 128)
844 			need_conversion = 1; /* in case domain is non-ascii we need a toASCII conversion */
845 	}
846 
847 	if (suffix.nlabels == 1) {
848 		/* TLD, this is the prevailing '*' match. If type excludes the '*' rule, continue.
849 		 */
850 		if (!(type & PSL_TYPE_NO_STAR_RULE))
851 			return 1;
852 	}
853 
854 	type &= ~PSL_TYPE_NO_STAR_RULE;
855 
856 	if (psl->utf8 || psl == &builtin_psl)
857 		need_conversion = 0;
858 
859 	if (need_conversion) {
860 		psl_idna_t *idna = psl_idna_open();
861 
862 		if (psl_idna_toASCII(idna, domain, &punycode) == 0) {
863 			suffix.label = punycode;
864 			suffix.length = strlen(punycode);
865 		} else {
866 			/* fallback */
867 
868 			suffix.label = domain;
869 			suffix.length = p - suffix.label;
870 		}
871 
872 		psl_idna_close(idna);
873 	} else {
874 		suffix.label = domain;
875 		suffix.length = p - suffix.label;
876 	}
877 
878 	if (psl == &builtin_psl || psl->dafsa) {
879 		size_t dafsa_size = psl == &builtin_psl ? sizeof(kDafsa) : psl->dafsa_size;
880 		const unsigned char *dafsa = psl == &builtin_psl ? kDafsa : psl->dafsa;
881 		int rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
882 		if (rc != -1) {
883 			/* check for correct rule type */
884 			if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
885 				goto suffix_no;
886 			else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
887 				goto suffix_no;
888 
889 			if (rc & PRIV_PSL_FLAG_EXCEPTION)
890 				goto suffix_no;
891 
892 			/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
893 			/* definitely a match, no matter if the found rule is a wildcard or not */
894 			goto suffix_yes;
895 		}
896 		if ((suffix.label = strchr(suffix.label, '.'))) {
897 			suffix.label++;
898 			suffix.length = strlen(suffix.label);
899 			suffix.nlabels--;
900 
901 			rc = LookupStringInFixedSet(dafsa, dafsa_size, suffix.label, suffix.length);
902 			if (rc != -1) {
903 				/* check for correct rule type */
904 				if (type == PSL_TYPE_ICANN && !(rc & PRIV_PSL_FLAG_ICANN))
905 					goto suffix_no;
906 				else if (type == PSL_TYPE_PRIVATE && !(rc & PRIV_PSL_FLAG_PRIVATE))
907 					goto suffix_no;
908 
909 				if (rc & PRIV_PSL_FLAG_WILDCARD)
910 					goto suffix_yes;
911 			}
912 		}
913 	} else {
914 		psl_entry_t *rule = vector_get(psl->suffixes, 0);
915 
916 		if (!rule || rule->nlabels < suffix.nlabels - 1)
917 			goto suffix_no;
918 
919 		rule = vector_get(psl->suffixes, vector_find(psl->suffixes, &suffix));
920 
921 		if (rule) {
922 			/* check for correct rule type */
923 			if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
924 				goto suffix_no;
925 			else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
926 				goto suffix_no;
927 
928 			if (rule->flags & PRIV_PSL_FLAG_EXCEPTION)
929 				goto suffix_no;
930 
931 			/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
932 			/* definitely a match, no matter if the found rule is a wildcard or not */
933 			goto suffix_yes;
934 		}
935 
936 		if ((suffix.label = strchr(suffix.label, '.'))) {
937 			int pos;
938 
939 			suffix.label++;
940 			suffix.length = strlen(suffix.label);
941 			suffix.nlabels--;
942 
943 			rule = vector_get(psl->suffixes, (pos = vector_find(psl->suffixes, &suffix)));
944 
945 			if (rule) {
946 				/* check for correct rule type */
947 				if (type == PSL_TYPE_ICANN && !(rule->flags & PRIV_PSL_FLAG_ICANN))
948 					goto suffix_no;
949 				else if (type == PSL_TYPE_PRIVATE && !(rule->flags & PRIV_PSL_FLAG_PRIVATE))
950 					goto suffix_no;
951 
952 				if (rule->flags & PRIV_PSL_FLAG_WILDCARD)
953 					goto suffix_yes;
954 			}
955 		}
956 	}
957 
958 suffix_no:
959 	if (punycode)
960 		free(punycode);
961 	return 0;
962 
963 suffix_yes:
964 	if (punycode)
965 		free(punycode);
966 	return 1;
967 }
968 
969 /**
970  * psl_is_public_suffix:
971  * @psl: PSL context
972  * @domain: Domain string
973  *
974  * This function checks if @domain is a public suffix by the means of the
975  * [Mozilla Public Suffix List](https://publicsuffix.org).
976  *
977  * For cookie domain checking see psl_is_cookie_domain_acceptable().
978  *
979  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
980  * Other encodings likely result in incorrect return values.
981  * Use helper function psl_str_to_utf8lower() for normalization @domain.
982  *
983  * @psl is a context returned by either psl_load_file(), psl_load_fp() or
984  * psl_builtin().
985  *
986  * Returns: 1 if domain is a public suffix, 0 if not.
987  *
988  * Since: 0.1
989  */
psl_is_public_suffix(const psl_ctx_t * psl,const char * domain)990 int psl_is_public_suffix(const psl_ctx_t *psl, const char *domain)
991 {
992 	if (!psl || !domain)
993 		return 1;
994 
995 	return is_public_suffix(psl, domain, PSL_TYPE_ANY);
996 }
997 
998 /**
999  * psl_is_public_suffix2:
1000  * @psl: PSL context
1001  * @domain: Domain string
1002  * @type: Domain type
1003  *
1004  * This function checks if @domain is a public suffix by the means of the
1005  * [Mozilla Public Suffix List](https://publicsuffix.org).
1006  *
1007  * @type specifies the PSL section where to perform the lookup. Valid values are
1008  * %PSL_TYPE_PRIVATE, %PSL_TYPE_ICANN, %PSL_TYPE_NO_STAR_RULE, and %PSL_TYPE_ANY.
1009  *
1010  * %PSL_TYPE_NO_STAR_RULE switches of the 'prevailing star rule' (see
1011  * [List](https://publicsuffix.org/list) under 'Algorithm' 2.).
1012  * Applying the flag means that TLDs not explicitly listed in the PSL are *not* treated as public suffixes.
1013  *
1014  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1015  * Other encodings likely result in incorrect return values.
1016  * Use helper function psl_str_to_utf8lower() for normalization @domain.
1017  *
1018  * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1019  * psl_builtin().
1020  *
1021  * Returns: 1 if domain is a public suffix, 0 if not.
1022  *
1023  * Since: 0.1
1024  */
psl_is_public_suffix2(const psl_ctx_t * psl,const char * domain,int type)1025 int psl_is_public_suffix2(const psl_ctx_t *psl, const char *domain, int type)
1026 {
1027 	if (!psl || !domain)
1028 		return 1;
1029 
1030 	return is_public_suffix(psl, domain, type);
1031 }
1032 
1033 /**
1034  * psl_unregistrable_domain:
1035  * @psl: PSL context
1036  * @domain: Domain string
1037  *
1038  * This function finds the longest public suffix part of @domain by the means
1039  * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1040  *
1041  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1042  * Other encodings likely result in incorrect return values.
1043  * Use helper function psl_str_to_utf8lower() for normalization @domain.
1044  *
1045  * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1046  * psl_builtin().
1047  *
1048  * Returns: Pointer to longest public suffix part of @domain or %NULL if @domain
1049  * does not contain a public suffix (or if @psl is %NULL).
1050  *
1051  * Since: 0.1
1052  */
psl_unregistrable_domain(const psl_ctx_t * psl,const char * domain)1053 const char *psl_unregistrable_domain(const psl_ctx_t *psl, const char *domain)
1054 {
1055 	int nlabels = 0;
1056 	const char *p;
1057 
1058 	if (!psl || !domain)
1059 		return NULL;
1060 
1061 	/*
1062 	 * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1063 	 * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1064 	 */
1065 	for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1066 		if (*p == '.' && ++nlabels > 8) {
1067 			domain = p + 1;
1068 			break;
1069 		}
1070 	}
1071 
1072 	/*
1073 	 *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1074 	 *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1075 	 */
1076 
1077 	while (!is_public_suffix(psl, domain, 0)) {
1078 		if ((domain = strchr(domain, '.')))
1079 			domain++;
1080 		else
1081 			break; /* prevent endless loop if is_public_suffix() is broken. */
1082 	}
1083 
1084 	return domain;
1085 }
1086 
1087 /**
1088  * psl_registrable_domain:
1089  * @psl: PSL context
1090  * @domain: Domain string
1091  *
1092  * This function finds the shortest private suffix part of @domain by the means
1093  * of the [Mozilla Public Suffix List](https://publicsuffix.org).
1094  *
1095  * International @domain names have to be either in UTF-8 (lowercase + NFKC) or in ASCII/ACE format (punycode).
1096  * Other encodings likely result in incorrect return values.
1097  * Use helper function psl_str_to_utf8lower() for normalization @domain.
1098  *
1099  * @psl is a context returned by either psl_load_file(), psl_load_fp() or
1100  * psl_builtin().
1101  *
1102  * Returns: Pointer to shortest private suffix part of @domain or %NULL if @domain
1103  * does not contain a private suffix (or if @psl is %NULL).
1104  *
1105  * Since: 0.1
1106  */
psl_registrable_domain(const psl_ctx_t * psl,const char * domain)1107 const char *psl_registrable_domain(const psl_ctx_t *psl, const char *domain)
1108 {
1109 	const char *p, *regdom = NULL;
1110 	int nlabels = 0;
1111 
1112 	if (!psl || !domain || *domain == '.')
1113 		return NULL;
1114 
1115 	/*
1116 	 * In the main loop we introduce a O(N^2) behavior to avoid code duplication.
1117 	 * To avoid nasty CPU hogging, we limit the lookup to max. 8 domain labels to the right.
1118 	 */
1119 	for (p = domain + strlen(domain) - 1; p >= domain; p--) {
1120 		if (*p == '.' && ++nlabels > 8) {
1121 			domain = p + 1;
1122 			break;
1123 		}
1124 	}
1125 
1126 	/*
1127 	 *  We check from left to right to catch special PSL entries like 'forgot.his.name':
1128 	 *   'forgot.his.name' and 'name' are in the PSL while 'his.name' is not.
1129 	 */
1130 
1131 	while (!is_public_suffix(psl, domain, 0)) {
1132 		if ((p = strchr(domain, '.'))) {
1133 			regdom = domain;
1134 			domain = p + 1;
1135 		} else
1136 			break; /* prevent endless loop if is_public_suffix() is broken. */
1137 	}
1138 
1139 	return regdom;
1140 }
1141 
1142 /**
1143  * psl_load_file:
1144  * @fname: Name of PSL file
1145  *
1146  * This function loads the public suffixes file named @fname.
1147  * To free the allocated resources, call psl_free().
1148  *
1149  * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1150  *
1151  * Returns: Pointer to a PSL context or %NULL on failure.
1152  *
1153  * Since: 0.1
1154  */
psl_load_file(const char * fname)1155 psl_ctx_t *psl_load_file(const char *fname)
1156 {
1157 	FILE *fp;
1158 	psl_ctx_t *psl = NULL;
1159 
1160 	if (!fname)
1161 		return NULL;
1162 
1163 	if ((fp = fopen(fname, "rb"))) {
1164 		psl = psl_load_fp(fp);
1165 		fclose(fp);
1166 	}
1167 
1168 	return psl;
1169 }
1170 
1171 /**
1172  * psl_load_fp:
1173  * @fp: %FILE pointer
1174  *
1175  * This function loads the public suffixes from a %FILE pointer.
1176  * To free the allocated resources, call psl_free().
1177  *
1178  * The suffixes are expected to be UTF-8 encoded (lowercase + NFKC) if they are international.
1179  *
1180  * Returns: Pointer to a PSL context or %NULL on failure.
1181  *
1182  * Since: 0.1
1183  */
psl_load_fp(FILE * fp)1184 psl_ctx_t *psl_load_fp(FILE *fp)
1185 {
1186 	psl_ctx_t *psl;
1187 	psl_entry_t suffix, *suffixp;
1188 	char buf[256], *linep, *p;
1189 	int type = 0, is_dafsa;
1190 	psl_idna_t *idna;
1191 
1192 	if (!fp)
1193 		return NULL;
1194 
1195 	if (!(psl = calloc(1, sizeof(psl_ctx_t))))
1196 		return NULL;
1197 
1198 	/* read first line to allow ASCII / DAFSA detection */
1199 	if (!(linep = fgets(buf, sizeof(buf) - 1, fp)))
1200 		goto fail;
1201 
1202 	is_dafsa = strlen(buf) == 16 && !strncmp(buf, ".DAFSA@PSL_", 11);
1203 
1204 	if (is_dafsa) {
1205 		void *m;
1206 		size_t size = 65536, n, len = 0;
1207 		int version = atoi(buf + 11);
1208 
1209 		if (version != 0)
1210 			goto fail;
1211 
1212 		if (!(psl->dafsa = malloc(size)))
1213 			goto fail;
1214 
1215 		memcpy(psl->dafsa, buf, len);
1216 
1217 		while ((n = fread(psl->dafsa + len, 1, size - len, fp)) > 0) {
1218 			len += n;
1219 			if (len >= size) {
1220 				if (!(m = realloc(psl->dafsa, size *= 2)))
1221 					goto fail;
1222 				psl->dafsa = m;
1223 			}
1224 		}
1225 
1226 		/* release unused memory */
1227 		if ((m = realloc(psl->dafsa, len)))
1228 			psl->dafsa = m;
1229 		else if (!len)
1230 			psl->dafsa = NULL; /* realloc() just free'd psl->dafsa */
1231 
1232 		psl->dafsa_size = len;
1233 		psl->utf8 = !!GetUtfMode(psl->dafsa, len);
1234 
1235 		return psl;
1236 	}
1237 
1238 	idna = psl_idna_open();
1239 
1240 	/*
1241 	 *  as of 02.11.2012, the list at https://publicsuffix.org/list/ contains ~6000 rules and 40 exceptions.
1242 	 *  as of 19.02.2014, the list at https://publicsuffix.org/list/ contains ~6500 rules and 19 exceptions.
1243 	 *  as of 07.10.2018, the list at https://publicsuffix.org/list/ contains ~8600 rules and 8 exceptions.
1244 	 */
1245 	psl->suffixes = vector_alloc(8*1024, suffix_compare_array);
1246 	psl->utf8 = 1; /* we put UTF-8 and punycode rules in the lookup vector */
1247 
1248 	do {
1249 		while (isspace_ascii(*linep)) linep++; /* ignore leading whitespace */
1250 		if (!*linep) continue; /* skip empty lines */
1251 
1252 		if (*linep == '/' && linep[1] == '/') {
1253 			if (!type) {
1254 				if (strstr(linep + 2, "===BEGIN ICANN DOMAINS==="))
1255 					type = PRIV_PSL_FLAG_ICANN;
1256 				else if (!type && strstr(linep + 2, "===BEGIN PRIVATE DOMAINS==="))
1257 					type = PRIV_PSL_FLAG_PRIVATE;
1258 			}
1259 			else if (type == PRIV_PSL_FLAG_ICANN && strstr(linep + 2, "===END ICANN DOMAINS==="))
1260 				type = 0;
1261 			else if (type == PRIV_PSL_FLAG_PRIVATE && strstr(linep + 2, "===END PRIVATE DOMAINS==="))
1262 				type = 0;
1263 
1264 			continue; /* skip comments */
1265 		}
1266 
1267 		/* parse suffix rule */
1268 		for (p = linep; *linep && !isspace_ascii(*linep);) linep++;
1269 		*linep = 0;
1270 
1271 		if (*p == '!') {
1272 			p++;
1273 			suffix.flags = PRIV_PSL_FLAG_EXCEPTION | type;
1274 			psl->nexceptions++;
1275 		} else if (*p == '*') {
1276 			if (*++p != '.') {
1277 				/* fprintf(stderr, "Unsupported kind of rule (ignored): %s\n", p - 1); */
1278 				continue;
1279 			}
1280 			p++;
1281 			/* wildcard *.foo.bar implicitly make foo.bar a public suffix */
1282 			suffix.flags = PRIV_PSL_FLAG_WILDCARD | PRIV_PSL_FLAG_PLAIN | type;
1283 			psl->nwildcards++;
1284 			psl->nsuffixes++;
1285 		} else {
1286 			suffix.flags = PRIV_PSL_FLAG_PLAIN | type;
1287 			psl->nsuffixes++;
1288 		}
1289 
1290 		if (suffix_init(&suffix, p, linep - p) == 0) {
1291 			int index;
1292 
1293 			if ((index = vector_find(psl->suffixes, &suffix)) >= 0) {
1294 				/* Found existing entry:
1295 				 * Combination of exception and plain rule is ambiguous
1296 				 * !foo.bar
1297 				 * foo.bar
1298 				 *
1299 				 * Allowed:
1300 				 * !foo.bar + *.foo.bar
1301 				 * foo.bar + *.foo.bar
1302 				 *
1303 				 * We do not check here, let's do it later.
1304 				 */
1305 
1306 				suffixp = vector_get(psl->suffixes, index);
1307 				suffixp->flags |= suffix.flags;
1308 			} else {
1309 				/* New entry */
1310 				suffixp = vector_get(psl->suffixes, vector_add(psl->suffixes, &suffix));
1311 			}
1312 
1313 			if (suffixp) {
1314 				suffixp->label = suffixp->label_buf; /* set label to changed address */
1315 				add_punycode_if_needed(idna, psl->suffixes, suffixp);
1316 			}
1317 		}
1318 	} while ((linep = fgets(buf, sizeof(buf), fp)));
1319 
1320 	vector_sort(psl->suffixes);
1321 
1322 	psl_idna_close(idna);
1323 
1324 	return psl;
1325 
1326 fail:
1327 	psl_free(psl);
1328 	return NULL;
1329 }
1330 
1331 /**
1332  * psl_free:
1333  * @psl: PSL context pointer
1334  *
1335  * This function frees the the PSL context that has been retrieved via
1336  * psl_load_fp() or psl_load_file().
1337  *
1338  * Since: 0.1
1339  */
psl_free(psl_ctx_t * psl)1340 void psl_free(psl_ctx_t *psl)
1341 {
1342 	if (psl && psl != &builtin_psl) {
1343 		vector_free(&psl->suffixes);
1344 		free(psl->dafsa);
1345 		free(psl);
1346 	}
1347 }
1348 
1349 /**
1350  * psl_builtin:
1351  *
1352  * This function returns the PSL context that has been generated and built in at compile-time.
1353  * You don't have to free the returned context explicitly.
1354  *
1355  * The builtin data also contains punycode entries, one for each international domain name.
1356  *
1357  * If the generation of built-in data has been disabled during compilation, %NULL will be returned.
1358  * When using the builtin psl context, you can provide UTF-8 (lowercase + NFKC) or ASCII/ACE (punycode)
1359  * representations of domains to functions like psl_is_public_suffix().
1360  *
1361  * Returns: Pointer to the built in PSL data or %NULL if this data is not available.
1362  *
1363  * Since: 0.1
1364  */
psl_builtin(void)1365 const psl_ctx_t *psl_builtin(void)
1366 {
1367 #if defined(BUILTIN_GENERATOR_LIBICU) || defined(BUILTIN_GENERATOR_LIBIDN2) || defined(BUILTIN_GENERATOR_LIBIDN)
1368 	return &builtin_psl;
1369 #else
1370 	return NULL;
1371 #endif
1372 }
1373 
1374 /**
1375  * psl_suffix_count:
1376  * @psl: PSL context pointer
1377  *
1378  * This function returns number of public suffixes maintained by @psl.
1379  * The number of exceptions within the Public Suffix List are not included.
1380  *
1381  * If the information is not available, the return value is -1 (since 0.19).
1382  * This is the case with DAFSA blobs or if @psl is %NULL.
1383  *
1384  * Returns: Number of public suffixes entries in PSL context or -1 if this information is not available.
1385  *
1386  * Since: 0.1
1387  */
psl_suffix_count(const psl_ctx_t * psl)1388 int psl_suffix_count(const psl_ctx_t *psl)
1389 {
1390 	if (psl == &builtin_psl)
1391 		return _psl_nsuffixes;
1392 	else if (psl)
1393 		return psl->dafsa ? -1 : psl->nsuffixes;
1394 	else
1395 		return -1;
1396 }
1397 
1398 /**
1399  * psl_suffix_exception_count:
1400  * @psl: PSL context pointer
1401  *
1402  * This function returns number of public suffix exceptions maintained by @psl.
1403  *
1404  * If the information is not available, the return value is -1 (since 0.19).
1405  * This is the case with DAFSA blobs or if @psl is %NULL.
1406  *
1407  * Returns: Number of public suffix exceptions in PSL context or -1 if this information is not available.
1408  *
1409  * Since: 0.1
1410  */
psl_suffix_exception_count(const psl_ctx_t * psl)1411 int psl_suffix_exception_count(const psl_ctx_t *psl)
1412 {
1413 	if (psl == &builtin_psl)
1414 		return _psl_nexceptions;
1415 	else if (psl)
1416 		return psl->dafsa ? -1 : psl->nexceptions;
1417 	else
1418 		return -1;
1419 }
1420 
1421 /**
1422  * psl_suffix_wildcard_count:
1423  * @psl: PSL context pointer
1424  *
1425  * This function returns number of public suffix wildcards maintained by @psl.
1426  *
1427  * If the information is not available, the return value is -1 (since 0.19).
1428  * This is the case with DAFSA blobs or if @psl is %NULL.
1429  *
1430  * Returns: Number of public suffix wildcards in PSL context or -1 if this information is not available.
1431  *
1432  * Since: 0.10.0
1433  */
psl_suffix_wildcard_count(const psl_ctx_t * psl)1434 int psl_suffix_wildcard_count(const psl_ctx_t *psl)
1435 {
1436 	if (psl == &builtin_psl)
1437 		return _psl_nwildcards;
1438 	else if (psl)
1439 		return psl->dafsa ? -1 : psl->nwildcards;
1440 	else
1441 		return -1;
1442 }
1443 
1444 /**
1445  * psl_builtin_file_time:
1446  *
1447  * This function returns the mtime of the Public Suffix List file that has been built in.
1448  *
1449  * If the generation of built-in data has been disabled during compilation, 0 will be returned.
1450  *
1451  * Returns: time_t value or 0.
1452  *
1453  * Since: 0.1
1454  */
psl_builtin_file_time(void)1455 time_t psl_builtin_file_time(void)
1456 {
1457 	return _psl_file_time;
1458 }
1459 
1460 /**
1461  * psl_builtin_sha1sum:
1462  *
1463  * This function returns the SHA1 checksum of the Public Suffix List file that has been built in.
1464  * The returned string is in lowercase hex encoding, e.g. "2af1e9e3044eda0678bb05949d7cca2f769901d8".
1465  *
1466  * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1467  *
1468  * Returns: String containing SHA1 checksum or an empty string.
1469  *
1470  * Since: 0.1
1471  */
psl_builtin_sha1sum(void)1472 const char *psl_builtin_sha1sum(void)
1473 {
1474 	return _psl_sha1_checksum;
1475 }
1476 
1477 /**
1478  * psl_builtin_filename:
1479  *
1480  * This function returns the file name of the Public Suffix List file that has been built in.
1481  *
1482  * If the generation of built-in data has been disabled during compilation, an empty string will be returned.
1483  *
1484  * Returns: String containing the PSL file name or an empty string.
1485  *
1486  * Since: 0.1
1487  */
psl_builtin_filename(void)1488 const char *psl_builtin_filename(void)
1489 {
1490 	return _psl_filename;
1491 }
1492 
1493 /**
1494  * psl_builtin_outdated:
1495  *
1496  * This function checks if the built-in data is older than the file it has been created from.
1497  * If it is, it might be a good idea for the application to reload the PSL.
1498  * The mtime is taken as reference.
1499  *
1500  * If the PSL file does not exist, it is assumed that the built-in data is not outdated.
1501  *
1502  * Returns: 1 if the built-in is outdated, 0 otherwise.
1503  *
1504  * Since: 0.10.0
1505  */
psl_builtin_outdated(void)1506 int psl_builtin_outdated(void)
1507 {
1508 	struct stat st;
1509 
1510 	if (stat(_psl_filename, &st) == 0 && st.st_mtime > _psl_file_time)
1511 		return 1;
1512 
1513 	return 0;
1514 }
1515 
1516 /**
1517  * psl_dist_filename:
1518  *
1519  * This function returns the file name of the distribution/system PSL data file.
1520  * This file will be considered by psl_latest().
1521  *
1522  * Return the filename that is set by ./configure --with-psl-distfile, or an empty string.
1523  *
1524  * Returns: String containing a PSL file name or an empty string.
1525  *
1526  * Since: 0.16
1527  */
psl_dist_filename(void)1528 const char *psl_dist_filename(void)
1529 {
1530 	return _psl_dist_filename;
1531 }
1532 
1533 /**
1534  * psl_get_version:
1535  *
1536  * Get libpsl version.
1537  *
1538  * Returns: String containing version of libpsl.
1539  *
1540  * Since: 0.2.5
1541  **/
psl_get_version(void)1542 const char *psl_get_version(void)
1543 {
1544 #ifdef WITH_LIBICU
1545 	return PACKAGE_VERSION " (+libicu/" U_ICU_VERSION ")";
1546 #elif defined(WITH_LIBIDN2)
1547 	return PACKAGE_VERSION " (+libidn2/" IDN2_VERSION ")";
1548 #elif defined(WITH_LIBIDN)
1549 	return PACKAGE_VERSION " (+libidn/" STRINGPREP_VERSION ")";
1550 #else
1551 	return PACKAGE_VERSION " (no IDNA support)";
1552 #endif
1553 }
1554 
1555 /**
1556  * psl_check_version_number:
1557  * @version: Version number (hex) to check against.
1558  *
1559  * Check the given version number is at minimum the current library version number.
1560  * The version number must be a hexadecimal number like 0x000a01 (V0.10.1).
1561  *
1562  * Returns: Returns the library version number if the given version number is at least
1563  * the version of the library, else return 0; If the argument is 0, the function returns
1564  * the library version number without performing a check.
1565  *
1566  * Since: 0.11.0
1567  **/
psl_check_version_number(int version)1568 int psl_check_version_number(int version)
1569 {
1570 	if (version) {
1571 		int major = version >> 16;
1572 		int minor = (version >> 8) & 0xFF;
1573 		int patch = version & 0xFF;
1574 
1575 		if (major < PSL_VERSION_MAJOR
1576 			|| (major == PSL_VERSION_MAJOR && minor < PSL_VERSION_MINOR)
1577 			|| (major == PSL_VERSION_MAJOR && minor == PSL_VERSION_MINOR && patch < PSL_VERSION_PATCH))
1578 		{
1579 			return 0;
1580 		}
1581 	}
1582 
1583 	return PSL_VERSION_NUMBER;
1584 }
1585 
1586 /* return whether hostname is an IP address or not */
isip(const char * hostname)1587 static int isip(const char *hostname)
1588 {
1589 #ifdef _WIN32
1590 	WCHAR wName[INET6_ADDRSTRLEN+1];
1591 
1592 	struct sockaddr_in  addr  = {0};
1593 	struct sockaddr_in6 addr6 = {0};
1594 
1595 	INT size  = sizeof(addr);
1596 	INT size6 = sizeof(addr6);
1597 
1598 	if (!MultiByteToWideChar(CP_UTF8, 0, hostname, -1, wName, countof(wName)))
1599 		return 0;
1600 
1601 	return (WSAStringToAddressW(wName, AF_INET,  NULL, (struct sockaddr *)&addr,  &size) != SOCKET_ERROR) |
1602 	       (WSAStringToAddressW(wName, AF_INET6, NULL, (struct sockaddr *)&addr6, &size6) != SOCKET_ERROR);
1603 #else
1604 	struct in_addr addr;
1605 	struct in6_addr addr6;
1606 
1607 	return inet_pton(AF_INET, hostname, &addr) || inet_pton(AF_INET6, hostname, &addr6);
1608 #endif
1609 }
1610 
1611 /**
1612  * psl_is_cookie_domain_acceptable:
1613  * @psl: PSL context pointer
1614  * @hostname: The request hostname.
1615  * @cookie_domain: The domain value from a cookie
1616  *
1617  * This helper function checks whether @cookie_domain is an acceptable cookie domain value for the request
1618  * @hostname.
1619  *
1620  * For international domain names both, @hostname and @cookie_domain, have to be either in UTF-8 (lowercase + NFKC)
1621  * or in ASCII/ACE (punycode) format. Other encodings or mixing UTF-8 and punycode likely result in incorrect return values.
1622  *
1623  * Use helper function psl_str_to_utf8lower() for normalization of @hostname and @cookie_domain.
1624  *
1625  * Examples:
1626  * 1. Cookie domain 'example.com' would be acceptable for hostname 'www.example.com',
1627  * but '.com' or 'com' would NOT be acceptable since 'com' is a public suffix.
1628  *
1629  * 2. Cookie domain 'his.name' would be acceptable for hostname 'remember.his.name',
1630  *  but NOT for 'forgot.his.name' since 'forgot.his.name' is a public suffix.
1631  *
1632  * Returns: 1 if acceptable, 0 if not acceptable.
1633  *
1634  * Since: 0.1
1635  */
psl_is_cookie_domain_acceptable(const psl_ctx_t * psl,const char * hostname,const char * cookie_domain)1636 int psl_is_cookie_domain_acceptable(const psl_ctx_t *psl, const char *hostname, const char *cookie_domain)
1637 {
1638 	const char *p;
1639 	size_t hostname_length, cookie_domain_length;
1640 
1641 	if (!psl || !hostname || !cookie_domain)
1642 		return 0;
1643 
1644 	while (*cookie_domain == '.')
1645 		cookie_domain++;
1646 
1647 	if (!strcmp(hostname, cookie_domain))
1648 		return 1; /* an exact match is acceptable (and pretty common) */
1649 
1650 	if (isip(hostname))
1651 		return 0; /* Hostname is an IP address and these must match fully (RFC 6265, 5.1.3) */
1652 
1653 	cookie_domain_length = strlen(cookie_domain);
1654 	hostname_length = strlen(hostname);
1655 
1656 	if (cookie_domain_length >= hostname_length)
1657 		return 0; /* cookie_domain is too long */
1658 
1659 	p = hostname + hostname_length - cookie_domain_length;
1660 	if (!strcmp(p, cookie_domain) && p[-1] == '.') {
1661 		/* OK, cookie_domain matches, but it must be longer than the longest public suffix in 'hostname' */
1662 
1663 		if (!(p = psl_unregistrable_domain(psl, hostname)))
1664 			return 1;
1665 
1666 		if (cookie_domain_length > strlen(p))
1667 			return 1;
1668 	}
1669 
1670 	return 0;
1671 }
1672 
1673 /**
1674  * psl_free_string:
1675  * @str: pointer to lowercase string returned by psl_str_to_utf8lower()
1676  *
1677  * This function free()'s the memory allocated by psl_str_to_utf8lower() when
1678  * returning a lowercase string
1679  *
1680  * Since: 0.19
1681  */
psl_free_string(char * str)1682 void psl_free_string(char *str)
1683 {
1684 	if (str)
1685 		free(str);
1686 }
1687 
1688 /**
1689  * psl_str_to_utf8lower:
1690  * @str: string to convert
1691  * @encoding: charset encoding of @str, e.g. 'iso-8859-1' or %NULL
1692  * @locale: locale of @str for to lowercase conversion, e.g. 'de' or %NULL
1693  * @lower: return value containing the converted string
1694  *
1695  * This helper function converts a string to UTF-8 lowercase + NFKC representation.
1696  * Lowercase + NFKC UTF-8 is needed as input to the domain checking functions.
1697  *
1698  * @lower stays unchanged on error.
1699  *
1700  * When returning PSL_SUCCESS, the return value 'lower' must be freed after usage.
1701  *
1702  * Returns: psl_error_t value.
1703  *   PSL_SUCCESS: Success
1704  *   PSL_ERR_INVALID_ARG: @str is a %NULL value.
1705  *   PSL_ERR_CONVERTER: Failed to open the unicode converter with name @encoding
1706  *   PSL_ERR_TO_UTF16: Failed to convert @str to unicode
1707  *   PSL_ERR_TO_LOWER: Failed to convert unicode to lowercase
1708  *   PSL_ERR_TO_UTF8: Failed to convert unicode to UTF-8
1709  *   PSL_ERR_NO_MEM: Failed to allocate memory
1710  *
1711  * Since: 0.4
1712  */
psl_str_to_utf8lower(const char * str,const char * encoding PSL_UNUSED,const char * locale PSL_UNUSED,char ** lower)1713 psl_error_t psl_str_to_utf8lower(const char *str, const char *encoding PSL_UNUSED, const char *locale PSL_UNUSED, char **lower)
1714 {
1715 	int ret = PSL_ERR_INVALID_ARG;
1716 
1717 	if (!str)
1718 		return PSL_ERR_INVALID_ARG;
1719 
1720 	/* shortcut to avoid costly conversion */
1721 	if (str_is_ascii(str)) {
1722 		if (lower) {
1723 			char *p, *tmp;
1724 
1725 			if (!(tmp = strdup(str)))
1726 				return PSL_ERR_NO_MEM;
1727 
1728 			*lower = tmp;
1729 
1730 			/* convert ASCII string to lowercase */
1731 			for (p = *lower; *p; p++)
1732 				if (isupper(*p))
1733 					*p = tolower(*p);
1734 		}
1735 		return PSL_SUCCESS;
1736 	}
1737 
1738 #ifdef WITH_LIBICU
1739 	do {
1740 	size_t str_length = strlen(str);
1741 	UErrorCode status = 0;
1742 	UChar *utf16_dst, *utf16_lower;
1743 	int32_t utf16_dst_length;
1744 	char *utf8_lower;
1745 	UConverter *uconv;
1746 
1747 	if (str_length < 256) {
1748 		/* C89 allocation */
1749 		utf16_dst   = alloca(sizeof(UChar) * (str_length * 2 + 1));
1750 		utf16_lower = alloca(sizeof(UChar) * (str_length * 2 + 1));
1751 		utf8_lower  = alloca(str_length * 6 + 1);
1752 	} else {
1753 		utf16_dst   = malloc(sizeof(UChar) * (str_length * 2 + 1));
1754 		utf16_lower = malloc(sizeof(UChar) * (str_length * 2 + 1));
1755 		utf8_lower  = malloc(str_length * 6 + 1);
1756 
1757 		if (!utf16_dst || !utf16_lower || !utf8_lower) {
1758 			ret = PSL_ERR_NO_MEM;
1759 			goto out;
1760 		}
1761 	}
1762 
1763 	uconv = ucnv_open(encoding, &status);
1764 	if (U_SUCCESS(status)) {
1765 		utf16_dst_length = ucnv_toUChars(uconv, utf16_dst, str_length * 2 + 1, str, str_length, &status);
1766 		ucnv_close(uconv);
1767 
1768 		if (U_SUCCESS(status)) {
1769 			int32_t utf16_lower_length = u_strToLower(utf16_lower, str_length * 2 + 1, utf16_dst, utf16_dst_length, locale, &status);
1770 			if (U_SUCCESS(status)) {
1771 				u_strToUTF8(utf8_lower, str_length * 6 + 1, NULL, utf16_lower, utf16_lower_length, &status);
1772 				if (U_SUCCESS(status)) {
1773 					ret = PSL_SUCCESS;
1774 					if (lower) {
1775 						char *tmp = strdup(utf8_lower);
1776 
1777 						if (tmp)
1778 							*lower = tmp;
1779 						else
1780 							ret = PSL_ERR_NO_MEM;
1781 					}
1782 				} else {
1783 					ret = PSL_ERR_TO_UTF8;
1784 					/* fprintf(stderr, "Failed to convert UTF-16 to UTF-8 (status %d)\n", status); */
1785 				}
1786 			} else {
1787 				ret = PSL_ERR_TO_LOWER;
1788 				/* fprintf(stderr, "Failed to convert UTF-16 to lowercase (status %d)\n", status); */
1789 			}
1790 		} else {
1791 			ret = PSL_ERR_TO_UTF16;
1792 			/* fprintf(stderr, "Failed to convert string to UTF-16 (status %d)\n", status); */
1793 		}
1794 	} else {
1795 		ret = PSL_ERR_CONVERTER;
1796 		/* fprintf(stderr, "Failed to open converter for '%s' (status %d)\n", encoding, status); */
1797 	}
1798 out:
1799 	if (str_length >= 256) {
1800 		free(utf16_dst);
1801 		free(utf16_lower);
1802 		free(utf8_lower);
1803 	}
1804 	} while (0);
1805 #elif defined(WITH_LIBIDN2) || defined(WITH_LIBIDN)
1806 	do {
1807 		/* find out local charset encoding */
1808 		if (!encoding) {
1809 #ifdef HAVE_NL_LANGINFO
1810 			encoding = nl_langinfo(CODESET);
1811 #elif defined _WIN32
1812 			static char buf[16];
1813 			snprintf(buf, sizeof(buf), "CP%u", GetACP());
1814 			encoding = buf;
1815 #endif
1816 			if (!encoding || !*encoding)
1817 				encoding = "ASCII";
1818 		}
1819 
1820 		/* convert to UTF-8 */
1821 		if (strcasecmp(encoding, "utf-8")) {
1822 			iconv_t cd = iconv_open("utf-8", encoding);
1823 
1824 			if (cd != (iconv_t)-1) {
1825 				char *tmp = (char *)str; /* iconv won't change where str points to, but changes tmp itself */
1826 				size_t tmp_len = strlen(str) + 1;
1827 				size_t dst_len = tmp_len * 6, dst_len_tmp = dst_len;
1828 				char *dst = malloc(dst_len + 1), *dst_tmp = dst;
1829 
1830 				if (!dst) {
1831 					ret = PSL_ERR_NO_MEM;
1832 				}
1833 				else if (iconv(cd, (WINICONV_CONST char **)&tmp, &tmp_len, &dst_tmp, &dst_len_tmp) != (size_t)-1
1834 					&& iconv(cd, NULL, NULL, &dst_tmp, &dst_len_tmp) != (size_t)-1)
1835 				{
1836 					/* start size for u8_tolower internal memory allocation.
1837 					 * u8_tolower() does not terminate the result string. we have 0 byte included in above tmp_len
1838 					 * and thus in len. */
1839 					size_t len = dst_len - dst_len_tmp;
1840 
1841 					if ((tmp = (char *)u8_tolower((uint8_t *)dst, len, 0, UNINORM_NFKC, NULL, &len))) {
1842 						ret = PSL_SUCCESS;
1843 						if (lower) {
1844 							*lower = tmp;
1845 							tmp = NULL;
1846 						} else
1847 							free(tmp);
1848 					} else {
1849 						ret = PSL_ERR_TO_LOWER;
1850 						/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1851 					}
1852 				} else {
1853 					ret = PSL_ERR_TO_UTF8;
1854 					/* fprintf(stderr, "Failed to convert '%s' string into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1855 				}
1856 
1857 				free(dst);
1858 				iconv_close(cd);
1859 			} else {
1860 				ret = PSL_ERR_TO_UTF8;
1861 				/* fprintf(stderr, "Failed to prepare encoding '%s' into '%s' (%d)\n", src_encoding, dst_encoding, errno); */
1862 			}
1863 		} else {
1864 			/* we need a conversion to lowercase */
1865 			uint8_t *tmp;
1866 
1867 			/* start size for u8_tolower internal memory allocation.
1868 			 * u8_tolower() does not terminate the result string, so include terminating 0 byte in len. */
1869 			size_t len = u8_strlen((uint8_t *)str) + 1;
1870 
1871 			if ((tmp = u8_tolower((uint8_t *)str, len, 0, UNINORM_NFKC, NULL, &len))) {
1872 				ret = PSL_SUCCESS;
1873 				if (lower) {
1874 					*lower = (char*)tmp;
1875 					tmp = NULL;
1876 				} else
1877 					free(tmp);
1878 			} else {
1879 				ret = PSL_ERR_TO_LOWER;
1880 				/* fprintf(stderr, "Failed to convert UTF-8 to lowercase (errno %d)\n", errno); */
1881 			}
1882 		}
1883 
1884 	} while (0);
1885 #endif
1886 
1887 	return ret;
1888 }
1889 
1890 /* if file is newer than the builtin data, insert it reverse sorted by mtime */
insert_file(const char * fname,const char ** psl_fname,time_t * psl_mtime,int n)1891 static int insert_file(const char *fname, const char **psl_fname, time_t *psl_mtime, int n)
1892 {
1893 	struct stat st;
1894 	int it;
1895 
1896 	if (fname && *fname && stat(fname, &st) == 0 && st.st_mtime > _psl_file_time) {
1897 		/* add file name and mtime to end of array */
1898 		psl_fname[n] = fname;
1899 		psl_mtime[n++] = st.st_mtime;
1900 
1901 		/* move the new entry to it's correct position */
1902 		for (it = n - 2; it >= 0 && st.st_mtime > psl_mtime[it]; it--) {
1903 			psl_fname[it + 1] = psl_fname[it];
1904 			psl_mtime[it + 1] = psl_mtime[it];
1905 			psl_fname[it] = fname;
1906 			psl_mtime[it] = st.st_mtime;
1907 		}
1908 	}
1909 
1910 	return n;
1911 }
1912 
1913 /**
1914  * psl_latest:
1915  * @fname: Name of PSL file or %NULL
1916  *
1917  * This function loads the the latest available PSL data from either
1918  * - @fname (application specific filename, may be %NULL)
1919  * - location specified during built-time (filename from ./configure --with-psl-distfile)
1920  * - built-in PSL data (generated from ./configure --with-psl-file)
1921  * - location of built-in data (filename from ./configure --with-psl-file)
1922  *
1923  * If none of the above is available, the function returns %NULL.
1924  *
1925  * To free the allocated resources, call psl_free().
1926  *
1927  * Returns: Pointer to a PSL context or %NULL on failure.
1928  *
1929  * Since: 0.16
1930  */
psl_latest(const char * fname)1931 psl_ctx_t *psl_latest(const char *fname)
1932 {
1933 	psl_ctx_t *psl;
1934 	const char *psl_fname[3];
1935 	time_t psl_mtime[3];
1936 	int it, ntimes;
1937 
1938 	psl_fname[0] = NULL; /* silence gcc 6.2 false warning */
1939 
1940 	/* create array of PSL files reverse sorted by mtime (latest first) */
1941 	ntimes = insert_file(fname, psl_fname, psl_mtime, 0);
1942 	ntimes = insert_file(_psl_dist_filename, psl_fname, psl_mtime, ntimes);
1943 	ntimes = insert_file(_psl_filename, psl_fname, psl_mtime, ntimes);
1944 
1945 	/* load PSL data from the latest file, falling back to the second recent, ... */
1946 	for (psl = NULL, it = 0; it < ntimes; it++) {
1947 		if (psl_mtime[it] > _psl_file_time)
1948 			if ((psl = psl_load_file(psl_fname[it])))
1949 				break;
1950 	}
1951 
1952 	/* if file loading failed or there is no file newer than the builtin data,
1953 	 * then return the builtin data. */
1954 	return psl ? psl : (psl_ctx_t *) psl_builtin();
1955 }
1956