1 #include "mupdf/fitz.h"
2 
3 #include <string.h>
4 #include <errno.h>
5 #include <math.h>
6 #include <float.h>
7 #include <stdlib.h>
8 
9 #ifndef PATH_MAX
10 #define PATH_MAX 4096
11 #endif
12 
13 #ifdef _WIN32
14 #include <windows.h> /* for MultiByteToWideChar etc. */
15 #endif
16 
17 static inline int
fz_tolower(int c)18 fz_tolower(int c)
19 {
20 	if (c >= 'A' && c <= 'Z')
21 		return c + 32;
22 	return c;
23 }
24 
25 size_t
fz_strnlen(const char * s,size_t n)26 fz_strnlen(const char *s, size_t n)
27 {
28 	const char *p = memchr(s, 0, n);
29 	return p ? (size_t) (p - s) : n;
30 }
31 
32 int
fz_strncasecmp(const char * a,const char * b,size_t n)33 fz_strncasecmp(const char *a, const char *b, size_t n)
34 {
35 	if (!n--)
36 		return 0;
37 	for (; *a && *b && n && (*a == *b || fz_tolower(*a) == fz_tolower(*b)); a++, b++, n--)
38 		;
39 	return fz_tolower(*a) - fz_tolower(*b);
40 }
41 
42 int
fz_strcasecmp(const char * a,const char * b)43 fz_strcasecmp(const char *a, const char *b)
44 {
45 	while (fz_tolower(*a) == fz_tolower(*b))
46 	{
47 		if (*a++ == 0)
48 			return 0;
49 		b++;
50 	}
51 	return fz_tolower(*a) - fz_tolower(*b);
52 }
53 
54 char *
fz_strsep(char ** stringp,const char * delim)55 fz_strsep(char **stringp, const char *delim)
56 {
57 	char *ret = *stringp;
58 	if (!ret) return NULL;
59 	if ((*stringp = strpbrk(*stringp, delim)) != NULL)
60 		*((*stringp)++) = '\0';
61 	return ret;
62 }
63 
64 size_t
fz_strlcpy(char * dst,const char * src,size_t siz)65 fz_strlcpy(char *dst, const char *src, size_t siz)
66 {
67 	register char *d = dst;
68 	register const char *s = src;
69 	register size_t n = siz;
70 
71 	/* Copy as many bytes as will fit */
72 	if (n != 0 && --n != 0) {
73 		do {
74 			if ((*d++ = *s++) == 0)
75 				break;
76 		} while (--n != 0);
77 	}
78 
79 	/* Not enough room in dst, add NUL and traverse rest of src */
80 	if (n == 0) {
81 		if (siz != 0)
82 			*d = '\0';		/* NUL-terminate dst */
83 		while (*s++)
84 			;
85 	}
86 
87 	return(s - src - 1);	/* count does not include NUL */
88 }
89 
90 size_t
fz_strlcat(char * dst,const char * src,size_t siz)91 fz_strlcat(char *dst, const char *src, size_t siz)
92 {
93 	register char *d = dst;
94 	register const char *s = src;
95 	register size_t n = siz;
96 	size_t dlen;
97 
98 	/* Find the end of dst and adjust bytes left but don't go past end */
99 	while (*d != '\0' && n-- != 0)
100 		d++;
101 	dlen = d - dst;
102 	n = siz - dlen;
103 
104 	if (n == 0)
105 		return dlen + strlen(s);
106 	while (*s != '\0') {
107 		if (n != 1) {
108 			*d++ = *s;
109 			n--;
110 		}
111 		s++;
112 	}
113 	*d = '\0';
114 
115 	return dlen + (s - src);	/* count does not include NUL */
116 }
117 
118 void
fz_dirname(char * dir,const char * path,size_t n)119 fz_dirname(char *dir, const char *path, size_t n)
120 {
121 	size_t i;
122 
123 	if (!path || !path[0])
124 	{
125 		fz_strlcpy(dir, ".", n);
126 		return;
127 	}
128 
129 	fz_strlcpy(dir, path, n);
130 
131 	i = strlen(dir);
132 	for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
133 	for(; dir[i] != '/'; --i) if (!i) { fz_strlcpy(dir, ".", n); return; }
134 	for(; dir[i] == '/'; --i) if (!i) { fz_strlcpy(dir, "/", n); return; }
135 	dir[i+1] = 0;
136 }
137 
138 #ifdef _WIN32
139 
fz_realpath(const char * path,char buf[PATH_MAX])140 char *fz_realpath(const char *path, char buf[PATH_MAX])
141 {
142 	wchar_t wpath[PATH_MAX];
143 	wchar_t wbuf[PATH_MAX];
144 	int i;
145 	if (!MultiByteToWideChar(CP_UTF8, 0, path, -1, wpath, PATH_MAX))
146 		return NULL;
147 	if (!GetFullPathNameW(wpath, PATH_MAX, wbuf, NULL))
148 		return NULL;
149 	if (!WideCharToMultiByte(CP_UTF8, 0, wbuf, -1, buf, PATH_MAX, NULL, NULL))
150 		return NULL;
151 	for (i=0; buf[i]; ++i)
152 		if (buf[i] == '\\')
153 			buf[i] = '/';
154 	return buf;
155 }
156 
157 #else
158 
fz_realpath(const char * path,char buf[PATH_MAX])159 char *fz_realpath(const char *path, char buf[PATH_MAX])
160 {
161 	return realpath(path, buf);
162 }
163 
164 #endif
165 
ishex(int a)166 static inline int ishex(int a)
167 {
168 	return (a >= 'A' && a <= 'F') ||
169 		(a >= 'a' && a <= 'f') ||
170 		(a >= '0' && a <= '9');
171 }
172 
tohex(int c)173 static inline int tohex(int c)
174 {
175 	if (c >= '0' && c <= '9') return c - '0';
176 	if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
177 	if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
178 	return 0;
179 }
180 
181 char *
fz_urldecode(char * url)182 fz_urldecode(char *url)
183 {
184 	char *s = url;
185 	char *p = url;
186 	while (*s)
187 	{
188 		int c = (unsigned char) *s++;
189 		if (c == '%' && ishex(s[0]) && ishex(s[1]))
190 		{
191 			int a = tohex(*s++);
192 			int b = tohex(*s++);
193 			*p++ = a << 4 | b;
194 		}
195 		else
196 		{
197 			*p++ = c;
198 		}
199 	}
200 	*p = 0;
201 	return url;
202 }
203 
204 void
fz_format_output_path(fz_context * ctx,char * path,size_t size,const char * fmt,int page)205 fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page)
206 {
207 	const char *s, *p;
208 	char num[40];
209 	int i, n;
210 	int z = 0;
211 
212 	for (i = 0; page; page /= 10)
213 		num[i++] = '0' + page % 10;
214 	num[i] = 0;
215 
216 	s = p = strchr(fmt, '%');
217 	if (p)
218 	{
219 		++p;
220 		while (*p >= '0' && *p <= '9')
221 			z = z * 10 + (*p++ - '0');
222 	}
223 	if (p && *p == 'd')
224 	{
225 		++p;
226 	}
227 	else
228 	{
229 		s = p = strrchr(fmt, '.');
230 		if (!p)
231 			s = p = fmt + strlen(fmt);
232 	}
233 
234 	if (z < 1)
235 		z = 1;
236 	while (i < z && i < (int)sizeof num)
237 		num[i++] = '0';
238 	n = s - fmt;
239 	if (n + i + strlen(p) >= size)
240 		fz_throw(ctx, FZ_ERROR_GENERIC, "path name buffer overflow");
241 	memcpy(path, fmt, n);
242 	while (i > 0)
243 		path[n++] = num[--i];
244 	fz_strlcpy(path + n, p, size - n);
245 }
246 
247 #define SEP(x) ((x)=='/' || (x) == 0)
248 
249 char *
fz_cleanname(char * name)250 fz_cleanname(char *name)
251 {
252 	char *p, *q, *dotdot;
253 	int rooted;
254 
255 	rooted = name[0] == '/';
256 
257 	/*
258 	 * invariants:
259 	 *		p points at beginning of path element we're considering.
260 	 *		q points just past the last path element we wrote (no slash).
261 	 *		dotdot points just past the point where .. cannot backtrack
262 	 *				any further (no slash).
263 	 */
264 	p = q = dotdot = name + rooted;
265 	while (*p)
266 	{
267 		if(p[0] == '/') /* null element */
268 			p++;
269 		else if (p[0] == '.' && SEP(p[1]))
270 			p += 1; /* don't count the separator in case it is nul */
271 		else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
272 		{
273 			p += 2;
274 			if (q > dotdot) /* can backtrack */
275 			{
276 				while(--q > dotdot && *q != '/')
277 					;
278 			}
279 			else if (!rooted) /* /.. is / but ./../ is .. */
280 			{
281 				if (q != name)
282 					*q++ = '/';
283 				*q++ = '.';
284 				*q++ = '.';
285 				dotdot = q;
286 			}
287 		}
288 		else /* real path element */
289 		{
290 			if (q != name+rooted)
291 				*q++ = '/';
292 			while ((*q = *p) != '/' && *q != 0)
293 				p++, q++;
294 		}
295 	}
296 
297 	if (q == name) /* empty string is really "." */
298 		*q++ = '.';
299 	*q = '\0';
300 	return name;
301 }
302 
303 enum
304 {
305 	UTFmax = 4, /* maximum bytes per rune */
306 	Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
307 	Runeself = 0x80, /* rune and UTF sequences are the same (<) */
308 	Runeerror = 0xFFFD, /* decoding error in UTF */
309 	Runemax = 0x10FFFF, /* maximum rune value */
310 };
311 
312 enum
313 {
314 	Bit1 = 7,
315 	Bitx = 6,
316 	Bit2 = 5,
317 	Bit3 = 4,
318 	Bit4 = 3,
319 	Bit5 = 2,
320 
321 	T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
322 	Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
323 	T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
324 	T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
325 	T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
326 	T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
327 
328 	Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
329 	Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
330 	Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
331 	Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0001 1111 1111 1111 1111 1111 */
332 
333 	Maskx = (1<<Bitx)-1,	/* 0011 1111 */
334 	Testx = Maskx ^ 0xFF,	/* 1100 0000 */
335 
336 	Bad = Runeerror,
337 };
338 
339 int
fz_chartorune(int * rune,const char * str)340 fz_chartorune(int *rune, const char *str)
341 {
342 	int c, c1, c2, c3;
343 	int l;
344 
345 	/*
346 	 * one character sequence
347 	 *	00000-0007F => T1
348 	 */
349 	c = *(const unsigned char*)str;
350 	if(c < Tx) {
351 		*rune = c;
352 		return 1;
353 	}
354 
355 	/*
356 	 * two character sequence
357 	 *	0080-07FF => T2 Tx
358 	 */
359 	c1 = *(const unsigned char*)(str+1) ^ Tx;
360 	if(c1 & Testx)
361 		goto bad;
362 	if(c < T3) {
363 		if(c < T2)
364 			goto bad;
365 		l = ((c << Bitx) | c1) & Rune2;
366 		if(l <= Rune1)
367 			goto bad;
368 		*rune = l;
369 		return 2;
370 	}
371 
372 	/*
373 	 * three character sequence
374 	 *	0800-FFFF => T3 Tx Tx
375 	 */
376 	c2 = *(const unsigned char*)(str+2) ^ Tx;
377 	if(c2 & Testx)
378 		goto bad;
379 	if(c < T4) {
380 		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
381 		if(l <= Rune2)
382 			goto bad;
383 		*rune = l;
384 		return 3;
385 	}
386 
387 	/*
388 	 * four character sequence (21-bit value)
389 	 *	10000-1FFFFF => T4 Tx Tx Tx
390 	 */
391 	c3 = *(const unsigned char*)(str+3) ^ Tx;
392 	if (c3 & Testx)
393 		goto bad;
394 	if (c < T5) {
395 		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
396 		if (l <= Rune3)
397 			goto bad;
398 		*rune = l;
399 		return 4;
400 	}
401 	/*
402 	 * Support for 5-byte or longer UTF-8 would go here, but
403 	 * since we don't have that, we'll just fall through to bad.
404 	 */
405 
406 	/*
407 	 * bad decoding
408 	 */
409 bad:
410 	*rune = Bad;
411 	return 1;
412 }
413 
414 int
fz_runetochar(char * str,int rune)415 fz_runetochar(char *str, int rune)
416 {
417 	/* Runes are signed, so convert to unsigned for range check. */
418 	unsigned int c = (unsigned int)rune;
419 
420 	/*
421 	 * one character sequence
422 	 *	00000-0007F => 00-7F
423 	 */
424 	if(c <= Rune1) {
425 		str[0] = c;
426 		return 1;
427 	}
428 
429 	/*
430 	 * two character sequence
431 	 *	0080-07FF => T2 Tx
432 	 */
433 	if(c <= Rune2) {
434 		str[0] = T2 | (c >> 1*Bitx);
435 		str[1] = Tx | (c & Maskx);
436 		return 2;
437 	}
438 
439 	/*
440 	 * If the Rune is out of range, convert it to the error rune.
441 	 * Do this test here because the error rune encodes to three bytes.
442 	 * Doing it earlier would duplicate work, since an out of range
443 	 * Rune wouldn't have fit in one or two bytes.
444 	 */
445 	if (c > Runemax)
446 		c = Runeerror;
447 
448 	/*
449 	 * three character sequence
450 	 *	0800-FFFF => T3 Tx Tx
451 	 */
452 	if (c <= Rune3) {
453 		str[0] = T3 | (c >> 2*Bitx);
454 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
455 		str[2] = Tx | (c & Maskx);
456 		return 3;
457 	}
458 
459 	/*
460 	 * four character sequence (21-bit value)
461 	 *	10000-1FFFFF => T4 Tx Tx Tx
462 	 */
463 	str[0] = T4 | (c >> 3*Bitx);
464 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
465 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
466 	str[3] = Tx | (c & Maskx);
467 	return 4;
468 }
469 
470 int
fz_runelen(int c)471 fz_runelen(int c)
472 {
473 	char str[10];
474 	return fz_runetochar(str, c);
475 }
476 
477 int
fz_utflen(const char * s)478 fz_utflen(const char *s)
479 {
480 	int c, n, rune;
481 	n = 0;
482 	for(;;) {
483 		c = *(const unsigned char*)s;
484 		if(c < Runeself) {
485 			if(c == 0)
486 				return n;
487 			s++;
488 		} else
489 			s += fz_chartorune(&rune, s);
490 		n++;
491 	}
492 	return 0;
493 }
494 
fz_atof(const char * s)495 float fz_atof(const char *s)
496 {
497 	float result;
498 
499 	if (s == NULL)
500 		return 0;
501 
502 	errno = 0;
503 	result = fz_strtof(s, NULL);
504 	if ((errno == ERANGE && result == 0) || isnan(result))
505 		/* Return 1.0 on  underflow, as it's a small known value that won't cause a divide by 0.  */
506 		return 1;
507 	result = fz_clamp(result, -FLT_MAX, FLT_MAX);
508 	return result;
509 }
510 
fz_atoi(const char * s)511 int fz_atoi(const char *s)
512 {
513 	if (s == NULL)
514 		return 0;
515 	return atoi(s);
516 }
517 
fz_atoi64(const char * s)518 int64_t fz_atoi64(const char *s)
519 {
520 	if (s == NULL)
521 		return 0;
522 	return atoll(s);
523 }
524 
fz_is_page_range(fz_context * ctx,const char * s)525 int fz_is_page_range(fz_context *ctx, const char *s)
526 {
527 	/* TODO: check the actual syntax... */
528 	while (*s)
529 	{
530 		if ((*s < '0' || *s > '9') && *s != 'N' && *s != '-' && *s != ',')
531 			return 0;
532 		s++;
533 	}
534 	return 1;
535 }
536 
fz_parse_page_range(fz_context * ctx,const char * s,int * a,int * b,int n)537 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n)
538 {
539 	if (!s || !s[0])
540 		return NULL;
541 
542 	if (s[0] == ',')
543 		s += 1;
544 
545 	if (s[0] == 'N')
546 	{
547 		*a = n;
548 		s += 1;
549 	}
550 	else
551 		*a = strtol(s, (char**)&s, 10);
552 
553 	if (s[0] == '-')
554 	{
555 		if (s[1] == 'N')
556 		{
557 			*b = n;
558 			s += 2;
559 		}
560 		else
561 			*b = strtol(s+1, (char**)&s, 10);
562 	}
563 	else
564 		*b = *a;
565 
566 	*a = fz_clampi(*a, 1, n);
567 	*b = fz_clampi(*b, 1, n);
568 
569 	return s;
570 }
571 
572 /* memmem from musl */
573 
574 #define MAX(a,b) ((a)>(b)?(a):(b))
575 
576 #define BITOP(a,b,op) \
577  ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))
578 
twobyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)579 static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
580 {
581 	uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1];
582 	for (h++, k--; k; k--, hw = hw<<8 | *++h)
583 		if (hw == nw) return (char *)h-1;
584 	return 0;
585 }
586 
threebyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)587 static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
588 {
589 	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8;
590 	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8;
591 	for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8)
592 		if (hw == nw) return (char *)h-2;
593 	return 0;
594 }
595 
fourbyte_memmem(const unsigned char * h,size_t k,const unsigned char * n)596 static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n)
597 {
598 	uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3];
599 	uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3];
600 	for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h)
601 		if (hw == nw) return (char *)h-3;
602 	return 0;
603 }
604 
twoway_memmem(const unsigned char * h,const unsigned char * z,const unsigned char * n,size_t l)605 static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l)
606 {
607 	size_t i, ip, jp, k, p, ms, p0, mem, mem0;
608 	size_t byteset[32 / sizeof(size_t)] = { 0 };
609 	size_t shift[256];
610 
611 	/* Computing length of needle and fill shift table */
612 	for (i=0; i<l; i++)
613 		BITOP(byteset, n[i], |=), shift[n[i]] = i+1;
614 
615 	/* Compute maximal suffix */
616 	ip = -1; jp = 0; k = p = 1;
617 	while (jp+k<l) {
618 		if (n[ip+k] == n[jp+k]) {
619 			if (k == p) {
620 				jp += p;
621 				k = 1;
622 			} else k++;
623 		} else if (n[ip+k] > n[jp+k]) {
624 			jp += k;
625 			k = 1;
626 			p = jp - ip;
627 		} else {
628 			ip = jp++;
629 			k = p = 1;
630 		}
631 	}
632 	ms = ip;
633 	p0 = p;
634 
635 	/* And with the opposite comparison */
636 	ip = -1; jp = 0; k = p = 1;
637 	while (jp+k<l) {
638 		if (n[ip+k] == n[jp+k]) {
639 			if (k == p) {
640 				jp += p;
641 				k = 1;
642 			} else k++;
643 		} else if (n[ip+k] < n[jp+k]) {
644 			jp += k;
645 			k = 1;
646 			p = jp - ip;
647 		} else {
648 			ip = jp++;
649 			k = p = 1;
650 		}
651 	}
652 	if (ip+1 > ms+1) ms = ip;
653 	else p = p0;
654 
655 	/* Periodic needle? */
656 	if (memcmp(n, n+p, ms+1)) {
657 		mem0 = 0;
658 		p = MAX(ms, l-ms-1) + 1;
659 	} else mem0 = l-p;
660 	mem = 0;
661 
662 	/* Search loop */
663 	for (;;) {
664 		/* If remainder of haystack is shorter than needle, done */
665 		if ((size_t)(z-h) < l) return 0;
666 
667 		/* Check last byte first; advance by shift on mismatch */
668 		if (BITOP(byteset, h[l-1], &)) {
669 			k = l-shift[h[l-1]];
670 			if (k) {
671 				if (mem0 && mem && k < p) k = l-p;
672 				h += k;
673 				mem = 0;
674 				continue;
675 			}
676 		} else {
677 			h += l;
678 			mem = 0;
679 			continue;
680 		}
681 
682 		/* Compare right half */
683 		for (k=MAX(ms+1,mem); k<l && n[k] == h[k]; k++);
684 		if (k < l) {
685 			h += k-ms;
686 			mem = 0;
687 			continue;
688 		}
689 		/* Compare left half */
690 		for (k=ms+1; k>mem && n[k-1] == h[k-1]; k--);
691 		if (k <= mem) return (char *)h;
692 		h += p;
693 		mem = mem0;
694 	}
695 }
696 
fz_memmem(const void * h0,size_t k,const void * n0,size_t l)697 void *fz_memmem(const void *h0, size_t k, const void *n0, size_t l)
698 {
699 	const unsigned char *h = h0, *n = n0;
700 
701 	/* Return immediately on empty needle */
702 	if (!l) return (void *)h;
703 
704 	/* Return immediately when needle is longer than haystack */
705 	if (k<l) return 0;
706 
707 	/* Use faster algorithms for short needles */
708 	h = memchr(h0, *n, k);
709 	if (!h || l==1) return (void *)h;
710 	k -= h - (const unsigned char *)h0;
711 	if (k<l) return 0;
712 	if (l==2) return twobyte_memmem(h, k, n);
713 	if (l==3) return threebyte_memmem(h, k, n);
714 	if (l==4) return fourbyte_memmem(h, k, n);
715 
716 	return twoway_memmem(h, h+k, n, l);
717 }
718