xref: /illumos-gate/usr/src/common/util/string.c (revision 4870e0a7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Implementations of the functions described in vsnprintf(3C) and string(3C),
28  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
29  * these functions match the section 3C manpages.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/null.h>
34 #include <sys/varargs.h>
35 
36 #if defined(_KERNEL)
37 #include <sys/systm.h>
38 #include <sys/debug.h>
39 #elif !defined(_BOOT)
40 #include <string.h>
41 #endif
42 
43 #include "memcpy.h"
44 #include "string.h"
45 
46 /*
47  * We don't need these for x86 boot or kmdb.
48  */
49 #if !defined(_KMDB) && (!defined(_BOOT) || defined(__sparc))
50 
51 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
52 
53 /*
54  * Given a buffer 'buf' of size 'buflen', render as much of the string
55  * described by <fmt, args> as possible.  The string will always be
56  * null-terminated, so the maximum string length is 'buflen - 1'.
57  * Returns the number of bytes that would be necessary to render the
58  * entire string, not including null terminator (just like vsnprintf(3S)).
59  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
60  *
61  * There is no support for floating point, and the C locale is assumed.
62  */
63 size_t
64 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
65 {
66 	uint64_t ul, tmp;
67 	char *bufp = buf;	/* current buffer pointer */
68 	int pad, width, base, sign, c, num;
69 	int prec, h_count, l_count, dot_count;
70 	int pad_count, transfer_count, left_align;
71 	char *digits, *sp, *bs;
72 	char numbuf[65];	/* sufficient for a 64-bit binary value */
73 	va_list args;
74 
75 	/*
76 	 * Make a copy so that all our callers don't have to make a copy
77 	 */
78 	va_copy(args, aargs);
79 
80 	if ((ssize_t)buflen < 0)
81 		buflen = 0;
82 
83 	while ((c = *fmt++) != '\0') {
84 		if (c != '%') {
85 			ADDCHAR(c);
86 			continue;
87 		}
88 
89 		width = prec = 0;
90 		left_align = base = sign = 0;
91 		h_count = l_count = dot_count = 0;
92 		pad = ' ';
93 		digits = "0123456789abcdef";
94 next_fmt:
95 		if ((c = *fmt++) == '\0')
96 			break;
97 
98 		if (c >= 'A' && c <= 'Z') {
99 			c += 'a' - 'A';
100 			digits = "0123456789ABCDEF";
101 		}
102 
103 		switch (c) {
104 		case '-':
105 			left_align++;
106 			goto next_fmt;
107 		case '0':
108 			if (dot_count == 0)
109 				pad = '0';
110 			/*FALLTHROUGH*/
111 		case '1':
112 		case '2':
113 		case '3':
114 		case '4':
115 		case '5':
116 		case '6':
117 		case '7':
118 		case '8':
119 		case '9':
120 			num = 0;
121 			for (;;) {
122 				num = 10 * num + c - '0';
123 				c = *fmt;
124 				if (c < '0' || c > '9')
125 					break;
126 				else
127 					fmt++;
128 			}
129 			if (dot_count > 0)
130 				prec = num;
131 			else
132 				width = num;
133 
134 			goto next_fmt;
135 		case '.':
136 			dot_count++;
137 			goto next_fmt;
138 		case '*':
139 			if (dot_count > 0)
140 				prec = (int)va_arg(args, int);
141 			else
142 				width = (int)va_arg(args, int);
143 			goto next_fmt;
144 		case 'l':
145 			l_count++;
146 			goto next_fmt;
147 		case 'h':
148 			h_count++;
149 			goto next_fmt;
150 		case 'd':
151 			sign = 1;
152 			/*FALLTHROUGH*/
153 		case 'u':
154 			base = 10;
155 			break;
156 		case 'p':
157 			l_count = 1;
158 			/*FALLTHROUGH*/
159 		case 'x':
160 			base = 16;
161 			break;
162 		case 'o':
163 			base = 8;
164 			break;
165 		case 'b':
166 			l_count = 0;
167 			base = 1;
168 			break;
169 		case 'c':
170 			c = (char)va_arg(args, int);
171 			ADDCHAR(c);
172 			break;
173 		case 's':
174 			sp = va_arg(args, char *);
175 			if (sp == NULL) {
176 				sp = "<null string>";
177 				/* avoid truncation */
178 				prec = strlen(sp);
179 			}
180 			/*
181 			 * Handle simple case specially to avoid
182 			 * performance hit of strlen()
183 			 */
184 			if (prec == 0 && width == 0) {
185 				while ((c = *sp++) != 0)
186 					ADDCHAR(c);
187 				break;
188 			}
189 			if (prec > 0) {
190 				transfer_count = strnlen(sp, prec);
191 				/* widen field if too narrow */
192 				if (prec > width)
193 					width = prec;
194 			} else
195 				transfer_count = strlen(sp);
196 			if (width > transfer_count)
197 				pad_count = width - transfer_count;
198 			else
199 				pad_count = 0;
200 			while ((!left_align) && (pad_count-- > 0))
201 				ADDCHAR(' ');
202 			/* ADDCHAR() evaluates arg at most once */
203 			while (transfer_count-- > 0)
204 				ADDCHAR(*sp++);
205 			while ((left_align) && (pad_count-- > 0))
206 				ADDCHAR(' ');
207 			break;
208 		case '%':
209 			ADDCHAR('%');
210 			break;
211 		}
212 
213 		if (base == 0)
214 			continue;
215 
216 		if (h_count == 0 && l_count == 0)
217 			if (sign)
218 				ul = (int64_t)va_arg(args, int);
219 			else
220 				ul = (int64_t)va_arg(args, unsigned int);
221 		else if (l_count > 1)
222 			if (sign)
223 				ul = (int64_t)va_arg(args, int64_t);
224 			else
225 				ul = (int64_t)va_arg(args, uint64_t);
226 		else if (l_count > 0)
227 			if (sign)
228 				ul = (int64_t)va_arg(args, long);
229 			else
230 				ul = (int64_t)va_arg(args, unsigned long);
231 		else if (h_count > 1)
232 			if (sign)
233 				ul = (int64_t)((char)va_arg(args, int));
234 			else
235 				ul = (int64_t)((unsigned char)va_arg(args,
236 				    int));
237 		else if (h_count > 0)
238 			if (sign)
239 				ul = (int64_t)((short)va_arg(args, int));
240 			else
241 				ul = (int64_t)((unsigned short)va_arg(args,
242 				    int));
243 
244 		if (sign && (int64_t)ul < 0)
245 			ul = -ul;
246 		else
247 			sign = 0;
248 
249 		if (c == 'b') {
250 			bs = va_arg(args, char *);
251 			base = *bs++;
252 		}
253 
254 		/* avoid repeated division if width is 0 */
255 		if (width > 0) {
256 			tmp = ul;
257 			do {
258 				width--;
259 			} while ((tmp /= base) != 0);
260 		}
261 
262 		if (sign && pad == '0')
263 			ADDCHAR('-');
264 		while (width-- > sign)
265 			ADDCHAR(pad);
266 		if (sign && pad == ' ')
267 			ADDCHAR('-');
268 
269 		sp = numbuf;
270 		tmp = ul;
271 		do {
272 			*sp++ = digits[tmp % base];
273 		} while ((tmp /= base) != 0);
274 
275 		while (sp > numbuf) {
276 			sp--;
277 			ADDCHAR(*sp);
278 		}
279 
280 		if (c == 'b' && ul != 0) {
281 			int any = 0;
282 			c = *bs++;
283 			while (c != 0) {
284 				if (ul & (1 << (c - 1))) {
285 					if (any++ == 0)
286 						ADDCHAR('<');
287 					while ((c = *bs++) >= 32)
288 						ADDCHAR(c);
289 					ADDCHAR(',');
290 				} else {
291 					while ((c = *bs++) >= 32)
292 						continue;
293 				}
294 			}
295 			if (any) {
296 				bufp--;
297 				ADDCHAR('>');
298 			}
299 		}
300 	}
301 	if (bufp - buf < buflen)
302 		bufp[0] = c;
303 	else if (buflen != 0)
304 		buf[buflen - 1] = c;
305 
306 	va_end(args);
307 
308 	return (bufp - buf);
309 }
310 
311 /*PRINTFLIKE1*/
312 size_t
313 snprintf(char *buf, size_t buflen, const char *fmt, ...)
314 {
315 	va_list args;
316 
317 	va_start(args, fmt);
318 	buflen = vsnprintf(buf, buflen, fmt, args);
319 	va_end(args);
320 
321 	return (buflen);
322 }
323 
324 #if defined(_BOOT) && defined(__sparc)
325 /*
326  * The sprintf() and vsprintf() routines aren't shared with the kernel because
327  * the DDI mandates that they return the buffer rather than its length.
328  */
329 /*PRINTFLIKE2*/
330 int
331 sprintf(char *buf, const char *fmt, ...)
332 {
333 	va_list args;
334 
335 	va_start(args, fmt);
336 	(void) vsnprintf(buf, INT_MAX, fmt, args);
337 	va_end(args);
338 
339 	return (strlen(buf));
340 }
341 
342 int
343 vsprintf(char *buf, const char *fmt, va_list args)
344 {
345 	(void) vsnprintf(buf, INT_MAX, fmt, args);
346 	return (strlen(buf));
347 }
348 #endif /* _BOOT && __sparc */
349 
350 #endif /* !_KMDB && (!_BOOT || __sparc) */
351 
352 char *
353 strcat(char *s1, const char *s2)
354 {
355 	char *os1 = s1;
356 
357 	while (*s1++ != '\0')
358 		;
359 	s1--;
360 	while ((*s1++ = *s2++) != '\0')
361 		;
362 	return (os1);
363 }
364 
365 char *
366 strchr(const char *sp, int c)
367 {
368 	do {
369 		if (*sp == (char)c)
370 			return ((char *)sp);
371 	} while (*sp++);
372 	return (NULL);
373 }
374 
375 int
376 strcmp(const char *s1, const char *s2)
377 {
378 	while (*s1 == *s2++)
379 		if (*s1++ == '\0')
380 			return (0);
381 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
382 }
383 
384 int
385 strncmp(const char *s1, const char *s2, size_t n)
386 {
387 	if (s1 == s2)
388 		return (0);
389 	n++;
390 	while (--n != 0 && *s1 == *s2++)
391 		if (*s1++ == '\0')
392 			return (0);
393 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
394 }
395 
396 static const char charmap[] = {
397 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
398 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
399 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
400 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
401 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
402 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
403 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
404 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
405 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
406 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
407 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
408 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
409 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
410 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
411 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
412 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
413 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
414 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
415 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
416 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
417 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
418 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
419 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
420 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
421 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
422 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
423 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
424 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
425 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
426 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
427 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
428 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
429 };
430 
431 int
432 strcasecmp(const char *s1, const char *s2)
433 {
434 	const unsigned char *cm = (const unsigned char *)charmap;
435 	const unsigned char *us1 = (const unsigned char *)s1;
436 	const unsigned char *us2 = (const unsigned char *)s2;
437 
438 	while (cm[*us1] == cm[*us2++])
439 		if (*us1++ == '\0')
440 			return (0);
441 	return (cm[*us1] - cm[*(us2 - 1)]);
442 }
443 
444 int
445 strncasecmp(const char *s1, const char *s2, size_t n)
446 {
447 	const unsigned char *cm = (const unsigned char *)charmap;
448 	const unsigned char *us1 = (const unsigned char *)s1;
449 	const unsigned char *us2 = (const unsigned char *)s2;
450 
451 	while (n != 0 && cm[*us1] == cm[*us2++]) {
452 		if (*us1++ == '\0')
453 			return (0);
454 		n--;
455 	}
456 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
457 }
458 
459 char *
460 strcpy(char *s1, const char *s2)
461 {
462 	char *os1 = s1;
463 
464 	while ((*s1++ = *s2++) != '\0')
465 		;
466 	return (os1);
467 }
468 
469 char *
470 strncpy(char *s1, const char *s2, size_t n)
471 {
472 	char *os1 = s1;
473 
474 	n++;
475 	while (--n != 0 && (*s1++ = *s2++) != '\0')
476 		;
477 	if (n != 0)
478 		while (--n != 0)
479 			*s1++ = '\0';
480 	return (os1);
481 }
482 
483 char *
484 strrchr(const char *sp, int c)
485 {
486 	char *r = NULL;
487 
488 	do {
489 		if (*sp == (char)c)
490 			r = (char *)sp;
491 	} while (*sp++);
492 
493 	return (r);
494 }
495 
496 char *
497 strstr(const char *as1, const char *as2)
498 {
499 	const char *s1, *s2;
500 	const char *tptr;
501 	char c;
502 
503 	s1 = as1;
504 	s2 = as2;
505 
506 	if (s2 == NULL || *s2 == '\0')
507 		return ((char *)s1);
508 	c = *s2;
509 
510 	while (*s1)
511 		if (*s1++ == c) {
512 			tptr = s1;
513 			while ((c = *++s2) == *s1++ && c)
514 				;
515 			if (c == 0)
516 				return ((char *)tptr - 1);
517 			s1 = tptr;
518 			s2 = as2;
519 			c = *s2;
520 		}
521 
522 	return (NULL);
523 }
524 
525 char *
526 strpbrk(const char *string, const char *brkset)
527 {
528 	const char *p;
529 
530 	do {
531 		for (p = brkset; *p != '\0' && *p != *string; ++p)
532 			;
533 		if (*p != '\0')
534 			return ((char *)string);
535 	} while (*string++);
536 
537 	return (NULL);
538 }
539 
540 char *
541 strncat(char *s1, const char *s2, size_t n)
542 {
543 	char *os1 = s1;
544 
545 	n++;
546 	while (*s1++ != '\0')
547 		;
548 	--s1;
549 	while ((*s1++ = *s2++) != '\0') {
550 		if (--n == 0) {
551 			s1[-1] = '\0';
552 			break;
553 		}
554 	}
555 	return (os1);
556 }
557 
558 #if defined(_BOOT) || defined(_KMDB)
559 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
560 #endif
561 
562 size_t
563 strlcat(char *dst, const char *src, size_t dstsize)
564 {
565 	char *df = dst;
566 	size_t left = dstsize;
567 	size_t l1;
568 	size_t l2 = strlen(src);
569 	size_t copied;
570 
571 	while (left-- != 0 && *df != '\0')
572 		df++;
573 	/*LINTED: possible ptrdiff_t overflow*/
574 	l1 = (size_t)(df - dst);
575 	if (dstsize == l1)
576 		return (l1 + l2);
577 
578 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
579 	bcopy(src, dst + l1, copied);
580 	dst[l1+copied] = '\0';
581 	return (l1 + l2);
582 }
583 
584 size_t
585 strlcpy(char *dst, const char *src, size_t len)
586 {
587 	size_t slen = strlen(src);
588 	size_t copied;
589 
590 	if (len == 0)
591 		return (slen);
592 
593 	if (slen >= len)
594 		copied = len - 1;
595 	else
596 		copied = slen;
597 	bcopy(src, dst, copied);
598 	dst[copied] = '\0';
599 	return (slen);
600 }
601 
602 size_t
603 strspn(const char *string, const char *charset)
604 {
605 	const char *p, *q;
606 
607 	for (q = string; *q != '\0'; ++q) {
608 		for (p = charset; *p != '\0' && *p != *q; ++p)
609 			;
610 		if (*p == '\0')
611 			break;
612 	}
613 
614 	/*LINTED: possible ptrdiff_t overflow*/
615 	return ((size_t)(q - string));
616 }
617 
618 size_t
619 strcspn(const char *string, const char *charset)
620 {
621 	const char *p, *q;
622 
623 	for (q = string; *q != '\0'; ++q) {
624 		for (p = charset; *p != '\0' && *p != *q; ++p)
625 			;
626 		if (*p != '\0')
627 			break;
628 	}
629 
630 	/*LINTED E_PTRDIFF_OVERFLOW*/
631 	return ((size_t)(q - string));
632 }
633 
634 /*
635  * strsep
636  *
637  * The strsep() function locates, in the string referenced by *stringp, the
638  * first occurrence of any character in the string delim (or the terminating
639  * `\0' character) and replaces it with a `\0'.  The location of the next
640  * character after the delimiter character (or NULL, if the end of the
641  * string was reached) is stored in *stringp.  The original value of
642  * *stringp is returned.
643  *
644  * If *stringp is initially NULL, strsep() returns NULL.
645  *
646  * NOTE: This instance is left for in-kernel use. Libraries and programs
647  *       should use strsep from libc.
648  */
649 char *
650 strsep(char **stringp, const char *delim)
651 {
652 	char *s;
653 	const char *spanp;
654 	int c, sc;
655 	char *tok;
656 
657 	if ((s = *stringp) == NULL)
658 		return (NULL);
659 
660 	for (tok = s; ; ) {
661 		c = *s++;
662 		spanp = delim;
663 		do {
664 			if ((sc = *spanp++) == c) {
665 				if (c == 0)
666 					s = NULL;
667 				else
668 					s[-1] = 0;
669 				*stringp = s;
670 				return (tok);
671 			}
672 		} while (sc != 0);
673 	}
674 	/* NOTREACHED */
675 }
676 
677 /*
678  * Unless mentioned otherwise, all of the routines below should be added to
679  * the Solaris DDI as necessary.  For now, only provide them to standalone.
680  */
681 #if defined(_BOOT) || defined(_KMDB)
682 char *
683 strtok(char *string, const char *sepset)
684 {
685 	char		*p, *q, *r;
686 	static char	*savept;
687 
688 	/*
689 	 * Set `p' to our current location in the string.
690 	 */
691 	p = (string == NULL) ? savept : string;
692 	if (p == NULL)
693 		return (NULL);
694 
695 	/*
696 	 * Skip leading separators; bail if no tokens remain.
697 	 */
698 	q = p + strspn(p, sepset);
699 	if (*q == '\0')
700 		return (NULL);
701 
702 	/*
703 	 * Mark the end of the token and set `savept' for the next iteration.
704 	 */
705 	if ((r = strpbrk(q, sepset)) == NULL)
706 		savept = NULL;
707 	else {
708 		*r = '\0';
709 		savept = ++r;
710 	}
711 
712 	return (q);
713 }
714 
715 /*
716  * The strlen() routine isn't shared with the kernel because it has its own
717  * hand-tuned assembly version.
718  */
719 size_t
720 strlen(const char *s)
721 {
722 	size_t n = 0;
723 
724 	while (*s++)
725 		n++;
726 	return (n);
727 }
728 
729 #endif /* _BOOT || _KMDB */
730 
731 /*
732  * Returns the number of non-NULL bytes in string argument,
733  * but not more than maxlen.  Does not look past str + maxlen.
734  */
735 size_t
736 strnlen(const char *s, size_t maxlen)
737 {
738 	size_t n = 0;
739 
740 	while (maxlen != 0 && *s != 0) {
741 		s++;
742 		maxlen--;
743 		n++;
744 	}
745 
746 	return (n);
747 }
748 
749 
750 #ifdef _KERNEL
751 /*
752  * Check for a valid C identifier:
753  *	a letter or underscore, followed by
754  *	zero or more letters, digits and underscores.
755  */
756 
757 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
758 
759 #define	IS_ALPHA(c)	\
760 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
761 
762 int
763 strident_valid(const char *id)
764 {
765 	int c = *id++;
766 
767 	if (!IS_ALPHA(c) && c != '_')
768 		return (0);
769 	while ((c = *id++) != 0) {
770 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
771 			return (0);
772 	}
773 	return (1);
774 }
775 
776 /*
777  * Convert a string into a valid C identifier by replacing invalid
778  * characters with '_'.  Also makes sure the string is nul-terminated
779  * and takes up at most n bytes.
780  */
781 void
782 strident_canon(char *s, size_t n)
783 {
784 	char c;
785 	char *end = s + n - 1;
786 
787 	ASSERT(n > 0);
788 
789 	if ((c = *s) == 0)
790 		return;
791 
792 	if (!IS_ALPHA(c) && c != '_')
793 		*s = '_';
794 
795 	while (s < end && ((c = *(++s)) != 0)) {
796 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
797 			*s = '_';
798 	}
799 	*s = 0;
800 }
801 
802 #endif	/* _KERNEL */
803