xref: /illumos-gate/usr/src/common/util/string.c (revision 186507a7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Implementations of the functions described in vsnprintf(3C) and string(3C),
31  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
32  * these functions match the section 3C manpages.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/varargs.h>
37 #if defined(_BOOT) || defined(_KMDB)
38 #include <string.h>
39 #else
40 #include <sys/systm.h>
41 #endif
42 #ifdef _KERNEL
43 #include <sys/debug.h>
44 #endif	/* _KERNEL */
45 
46 /*
47  * kmdb has its own *printf routines, and thus doesn't need these versions too.
48  */
49 #if !defined(_KMDB)
50 
51 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
52 
53 /*
54  * Given a buffer 'buf' of size 'buflen', render as much of the string
55  * described by <fmt, args> as possible.  The string will always be
56  * null-terminated, so the maximum string length is 'buflen - 1'.
57  * Returns the number of bytes that would be necessary to render the
58  * entire string, not including null terminator (just like vsnprintf(3S)).
59  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
60  *
61  * There is no support for floating point, and the C locale is assumed.
62  */
63 size_t
64 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
65 {
66 	uint64_t ul, tmp;
67 	char *bufp = buf;	/* current buffer pointer */
68 	int pad, width, base, sign, c, num;
69 	int prec, h_count, l_count, dot_count;
70 	int pad_count, transfer_count, left_align;
71 	char *digits, *sp, *bs;
72 	char numbuf[65];	/* sufficient for a 64-bit binary value */
73 	va_list args;
74 
75 	/*
76 	 * Make a copy so that all our callers don't have to make a copy
77 	 */
78 	va_copy(args, aargs);
79 
80 	if ((ssize_t)buflen < 0)
81 		buflen = 0;
82 
83 	while ((c = *fmt++) != '\0') {
84 		if (c != '%') {
85 			ADDCHAR(c);
86 			continue;
87 		}
88 
89 		width = prec = 0;
90 		left_align = base = sign = 0;
91 		h_count = l_count = dot_count = 0;
92 		pad = ' ';
93 		digits = "0123456789abcdef";
94 next_fmt:
95 		if ((c = *fmt++) == '\0')
96 			break;
97 
98 		if (c >= 'A' && c <= 'Z') {
99 			c += 'a' - 'A';
100 			digits = "0123456789ABCDEF";
101 		}
102 
103 		switch (c) {
104 		case '-':
105 			left_align++;
106 			goto next_fmt;
107 		case '0':
108 			if (dot_count == 0)
109 				pad = '0';
110 			/*FALLTHROUGH*/
111 		case '1':
112 		case '2':
113 		case '3':
114 		case '4':
115 		case '5':
116 		case '6':
117 		case '7':
118 		case '8':
119 		case '9':
120 			num = 0;
121 			for (;;) {
122 				num = 10 * num + c - '0';
123 				c = *fmt;
124 				if (c < '0' || c > '9')
125 					break;
126 				else
127 					fmt++;
128 			}
129 			if (dot_count > 0)
130 				prec = num;
131 			else
132 				width = num;
133 
134 			goto next_fmt;
135 		case '.':
136 			dot_count++;
137 			goto next_fmt;
138 		case '*':
139 			width = (int)va_arg(args, int);
140 			goto next_fmt;
141 		case 'l':
142 			l_count++;
143 			goto next_fmt;
144 		case 'h':
145 			h_count++;
146 			goto next_fmt;
147 		case 'd':
148 			sign = 1;
149 			/*FALLTHROUGH*/
150 		case 'u':
151 			base = 10;
152 			break;
153 		case 'p':
154 			l_count = 1;
155 			/*FALLTHROUGH*/
156 		case 'x':
157 			base = 16;
158 			break;
159 		case 'o':
160 			base = 8;
161 			break;
162 		case 'b':
163 			l_count = 0;
164 			base = 1;
165 			break;
166 		case 'c':
167 			c = (char)va_arg(args, char);
168 			ADDCHAR(c);
169 			break;
170 		case 's':
171 			sp = va_arg(args, char *);
172 			if (sp == NULL) {
173 				sp = "<null string>";
174 				/* avoid truncation */
175 				prec = strlen(sp);
176 			}
177 			/*
178 			 * Handle simple case specially to avoid
179 			 * performance hit of strlen()
180 			 */
181 			if (prec == 0 && width == 0) {
182 				while ((c = *sp++) != 0)
183 					ADDCHAR(c);
184 				break;
185 			}
186 			transfer_count = strlen(sp);
187 			if (prec > 0) {
188 				/* trim string if too long */
189 				if (transfer_count > prec)
190 					transfer_count = prec;
191 				/* widen field if too narrow */
192 				if (prec > width)
193 					width = prec;
194 			}
195 			if (width > transfer_count)
196 				pad_count = width - transfer_count;
197 			else
198 				pad_count = 0;
199 			while ((!left_align) && (pad_count-- > 0))
200 				ADDCHAR(' ');
201 			/* ADDCHAR() evaluates arg at most once */
202 			while (transfer_count-- > 0)
203 				ADDCHAR(*sp++);
204 			while ((left_align) && (pad_count-- > 0))
205 				ADDCHAR(' ');
206 			break;
207 		case '%':
208 			ADDCHAR('%');
209 			break;
210 		}
211 
212 		if (base == 0)
213 			continue;
214 
215 		if (h_count == 0 && l_count == 0)
216 			if (sign)
217 				ul = (int64_t)va_arg(args, int);
218 			else
219 				ul = (int64_t)va_arg(args, unsigned int);
220 		else if (l_count > 1)
221 			if (sign)
222 				ul = (int64_t)va_arg(args, int64_t);
223 			else
224 				ul = (int64_t)va_arg(args, uint64_t);
225 		else if (l_count > 0)
226 			if (sign)
227 				ul = (int64_t)va_arg(args, long);
228 			else
229 				ul = (int64_t)va_arg(args, unsigned long);
230 		else if (h_count > 1)
231 			if (sign)
232 				ul = (int64_t)va_arg(args, char);
233 			else
234 				ul = (int64_t)va_arg(args, unsigned char);
235 		else if (h_count > 0)
236 			if (sign)
237 				ul = (int64_t)va_arg(args, short);
238 			else
239 				ul = (int64_t)va_arg(args, unsigned short);
240 
241 		if (sign && (int64_t)ul < 0)
242 			ul = -ul;
243 		else
244 			sign = 0;
245 
246 		if (c == 'b') {
247 			bs = va_arg(args, char *);
248 			base = *bs++;
249 		}
250 
251 		/* avoid repeated division if width is 0 */
252 		if (width > 0) {
253 			tmp = ul;
254 			do {
255 				width--;
256 			} while ((tmp /= base) != 0);
257 		}
258 
259 		if (sign && pad == '0')
260 			ADDCHAR('-');
261 		while (width-- > sign)
262 			ADDCHAR(pad);
263 		if (sign && pad == ' ')
264 			ADDCHAR('-');
265 
266 		sp = numbuf;
267 		tmp = ul;
268 		do {
269 			*sp++ = digits[tmp % base];
270 		} while ((tmp /= base) != 0);
271 
272 		while (sp > numbuf) {
273 			sp--;
274 			ADDCHAR(*sp);
275 		}
276 
277 		if (c == 'b' && ul != 0) {
278 			int any = 0;
279 			c = *bs++;
280 			while (c != 0) {
281 				if (ul & (1 << (c - 1))) {
282 					if (any++ == 0)
283 						ADDCHAR('<');
284 					while ((c = *bs++) >= 32)
285 						ADDCHAR(c);
286 					ADDCHAR(',');
287 				} else {
288 					while ((c = *bs++) >= 32)
289 						continue;
290 				}
291 			}
292 			if (any) {
293 				bufp--;
294 				ADDCHAR('>');
295 			}
296 		}
297 	}
298 	if (bufp - buf < buflen)
299 		bufp[0] = c;
300 	else if (buflen != 0)
301 		buf[buflen - 1] = c;
302 
303 	va_end(args);
304 
305 	return (bufp - buf);
306 }
307 
308 /*PRINTFLIKE1*/
309 size_t
310 snprintf(char *buf, size_t buflen, const char *fmt, ...)
311 {
312 	va_list args;
313 
314 	va_start(args, fmt);
315 	buflen = vsnprintf(buf, buflen, fmt, args);
316 	va_end(args);
317 
318 	return (buflen);
319 }
320 
321 #if defined(_BOOT)
322 /*
323  * The sprintf() and vsprintf() routines aren't shared with the kernel because
324  * the DDI mandates that they return the buffer rather than its length.
325  */
326 /*PRINTFLIKE2*/
327 int
328 sprintf(char *buf, const char *fmt, ...)
329 {
330 	va_list args;
331 
332 	va_start(args, fmt);
333 	(void) vsnprintf(buf, INT_MAX, fmt, args);
334 	va_end(args);
335 
336 	return (strlen(buf));
337 }
338 
339 int
340 vsprintf(char *buf, const char *fmt, va_list args)
341 {
342 	(void) vsnprintf(buf, INT_MAX, fmt, args);
343 	return (strlen(buf));
344 }
345 #endif
346 
347 #endif /* !_KMDB */
348 
349 char *
350 strcat(char *s1, const char *s2)
351 {
352 	char *os1 = s1;
353 
354 	while (*s1++ != '\0')
355 		;
356 	s1--;
357 	while ((*s1++ = *s2++) != '\0')
358 		;
359 	return (os1);
360 }
361 
362 char *
363 strchr(const char *sp, int c)
364 {
365 	do {
366 		if (*sp == (char)c)
367 			return ((char *)sp);
368 	} while (*sp++);
369 	return (NULL);
370 }
371 
372 int
373 strcmp(const char *s1, const char *s2)
374 {
375 	while (*s1 == *s2++)
376 		if (*s1++ == '\0')
377 			return (0);
378 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
379 }
380 
381 int
382 strncmp(const char *s1, const char *s2, size_t n)
383 {
384 	if (s1 == s2)
385 		return (0);
386 	n++;
387 	while (--n != 0 && *s1 == *s2++)
388 		if (*s1++ == '\0')
389 			return (0);
390 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
391 }
392 
393 static const char charmap[] = {
394 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
395 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
396 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
397 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
398 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
399 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
400 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
401 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
402 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
403 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
404 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
405 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
406 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
407 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
408 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
409 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
410 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
411 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
412 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
413 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
414 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
415 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
416 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
417 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
418 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
419 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
420 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
421 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
422 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
423 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
424 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
425 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
426 };
427 
428 int
429 strcasecmp(const char *s1, const char *s2)
430 {
431 	const unsigned char *cm = (const unsigned char *)charmap;
432 	const unsigned char *us1 = (const unsigned char *)s1;
433 	const unsigned char *us2 = (const unsigned char *)s2;
434 
435 	while (cm[*us1] == cm[*us2++])
436 		if (*us1++ == '\0')
437 			return (0);
438 	return (cm[*us1] - cm[*(us2 - 1)]);
439 }
440 
441 int
442 strncasecmp(const char *s1, const char *s2, size_t n)
443 {
444 	const unsigned char *cm = (const unsigned char *)charmap;
445 	const unsigned char *us1 = (const unsigned char *)s1;
446 	const unsigned char *us2 = (const unsigned char *)s2;
447 
448 	while (n != 0 && cm[*us1] == cm[*us2++]) {
449 		if (*us1++ == '\0')
450 			return (0);
451 		n--;
452 	}
453 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
454 }
455 
456 char *
457 strcpy(char *s1, const char *s2)
458 {
459 	char *os1 = s1;
460 
461 	while ((*s1++ = *s2++) != '\0')
462 		;
463 	return (os1);
464 }
465 
466 char *
467 strncpy(char *s1, const char *s2, size_t n)
468 {
469 	char *os1 = s1;
470 
471 	n++;
472 	while (--n != 0 && (*s1++ = *s2++) != '\0')
473 		;
474 	if (n != 0)
475 		while (--n != 0)
476 			*s1++ = '\0';
477 	return (os1);
478 }
479 
480 char *
481 strrchr(const char *sp, int c)
482 {
483 	char *r = NULL;
484 
485 	do {
486 		if (*sp == (char)c)
487 			r = (char *)sp;
488 	} while (*sp++);
489 
490 	return (r);
491 }
492 
493 char *
494 strstr(const char *as1, const char *as2)
495 {
496 	const char *s1, *s2;
497 	const char *tptr;
498 	char c;
499 
500 	s1 = as1;
501 	s2 = as2;
502 
503 	if (s2 == NULL || *s2 == '\0')
504 		return ((char *)s1);
505 	c = *s2;
506 
507 	while (*s1)
508 		if (*s1++ == c) {
509 			tptr = s1;
510 			while ((c = *++s2) == *s1++ && c)
511 				;
512 			if (c == 0)
513 				return ((char *)tptr - 1);
514 			s1 = tptr;
515 			s2 = as2;
516 			c = *s2;
517 		}
518 
519 	return (NULL);
520 }
521 
522 char *
523 strpbrk(const char *string, const char *brkset)
524 {
525 	const char *p;
526 
527 	do {
528 		for (p = brkset; *p != '\0' && *p != *string; ++p)
529 			;
530 		if (*p != '\0')
531 			return ((char *)string);
532 	} while (*string++);
533 
534 	return (NULL);
535 }
536 
537 char *
538 strncat(char *s1, const char *s2, size_t n)
539 {
540 	char *os1 = s1;
541 
542 	n++;
543 	while (*s1++ != '\0')
544 		;
545 	--s1;
546 	while ((*s1++ = *s2++) != '\0') {
547 		if (--n == 0) {
548 			s1[-1] = '\0';
549 			break;
550 		}
551 	}
552 	return (os1);
553 }
554 
555 #if defined(_BOOT) || defined(_KMDB)
556 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
557 #endif
558 
559 size_t
560 strlcat(char *dst, const char *src, size_t dstsize)
561 {
562 	char *df = dst;
563 	size_t left = dstsize;
564 	size_t l1;
565 	size_t l2 = strlen(src);
566 	size_t copied;
567 
568 	while (left-- != 0 && *df != '\0')
569 		df++;
570 	l1 = df - dst;
571 	if (dstsize == l1)
572 		return (l1 + l2);
573 
574 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
575 	bcopy(src, dst + l1, copied);
576 	dst[l1+copied] = '\0';
577 	return (l1 + l2);
578 }
579 
580 size_t
581 strlcpy(char *dst, const char *src, size_t len)
582 {
583 	size_t slen = strlen(src);
584 	size_t copied;
585 
586 	if (len == 0)
587 		return (slen);
588 
589 	if (slen >= len)
590 		copied = len - 1;
591 	else
592 		copied = slen;
593 	bcopy(src, dst, copied);
594 	dst[copied] = '\0';
595 	return (slen);
596 }
597 
598 size_t
599 strspn(const char *string, const char *charset)
600 {
601 	const char *p, *q;
602 
603 	for (q = string; *q != '\0'; ++q) {
604 		for (p = charset; *p != '\0' && *p != *q; ++p)
605 			;
606 		if (*p == '\0')
607 			break;
608 	}
609 
610 	return (q - string);
611 }
612 
613 /*
614  * Unless mentioned otherwise, all of the routines below should be added to
615  * the Solaris DDI as necessary.  For now, only provide them to standalone.
616  */
617 #if defined(_BOOT) || defined(_KMDB)
618 char *
619 strtok(char *string, const char *sepset)
620 {
621 	char		*p, *q, *r;
622 	static char	*savept;
623 
624 	/*
625 	 * Set `p' to our current location in the string.
626 	 */
627 	p = (string == NULL) ? savept : string;
628 	if (p == NULL)
629 		return (NULL);
630 
631 	/*
632 	 * Skip leading separators; bail if no tokens remain.
633 	 */
634 	q = p + strspn(p, sepset);
635 	if (*q == '\0')
636 		return (NULL);
637 
638 	/*
639 	 * Mark the end of the token and set `savept' for the next iteration.
640 	 */
641 	if ((r = strpbrk(q, sepset)) == NULL)
642 		savept = NULL;
643 	else {
644 		*r = '\0';
645 		savept = ++r;
646 	}
647 
648 	return (q);
649 }
650 
651 /*
652  * The strlen() routine isn't shared with the kernel because it has its own
653  * hand-tuned assembly version.
654  */
655 size_t
656 strlen(const char *s)
657 {
658 	size_t n = 0;
659 
660 	while (*s++)
661 		n++;
662 	return (n);
663 }
664 
665 #endif /* _BOOT || _KMDB */
666 
667 #ifdef _KERNEL
668 /*
669  * Check for a valid C identifier:
670  *	a letter or underscore, followed by
671  *	zero or more letters, digits and underscores.
672  */
673 
674 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
675 
676 #define	IS_ALPHA(c)	\
677 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
678 
679 int
680 strident_valid(const char *id)
681 {
682 	int c = *id++;
683 
684 	if (!IS_ALPHA(c) && c != '_')
685 		return (0);
686 	while ((c = *id++) != 0) {
687 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
688 			return (0);
689 	}
690 	return (1);
691 }
692 
693 /*
694  * Convert a string into a valid C identifier by replacing invalid
695  * characters with '_'.  Also makes sure the string is nul-terminated
696  * and takes up at most n bytes.
697  */
698 void
699 strident_canon(char *s, size_t n)
700 {
701 	char c;
702 	char *end = s + n - 1;
703 
704 	ASSERT(n > 0);
705 
706 	if ((c = *s) == 0)
707 		return;
708 
709 	if (!IS_ALPHA(c) && c != '_')
710 		*s = '_';
711 
712 	while (s < end && ((c = *(++s)) != 0)) {
713 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
714 			*s = '_';
715 	}
716 	*s = 0;
717 }
718 
719 #endif	/* _KERNEL */
720