xref: /illumos-gate/usr/src/common/util/string.c (revision 1979231e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Implementations of the functions described in vsnprintf(3C) and string(3C),
30  * for use by the kernel, the standalone, and kmdb.  Unless otherwise specified,
31  * these functions match the section 3C manpages.
32  */
33 
34 #include <sys/types.h>
35 #include <sys/varargs.h>
36 #if defined(_BOOT) || defined(_KMDB)
37 #include <string.h>
38 #else
39 #include <sys/systm.h>
40 #endif
41 #ifdef _KERNEL
42 #include <sys/debug.h>
43 #endif	/* _KERNEL */
44 
45 /*
46  * kmdb has its own *printf routines, and thus doesn't need these versions too.
47  */
48 #if !defined(_KMDB)
49 
50 #define	ADDCHAR(c)	if (bufp++ - buf < buflen) bufp[-1] = (c)
51 
52 /*
53  * Given a buffer 'buf' of size 'buflen', render as much of the string
54  * described by <fmt, args> as possible.  The string will always be
55  * null-terminated, so the maximum string length is 'buflen - 1'.
56  * Returns the number of bytes that would be necessary to render the
57  * entire string, not including null terminator (just like vsnprintf(3S)).
58  * To determine buffer size in advance, use vsnprintf(NULL, 0, fmt, args) + 1.
59  *
60  * There is no support for floating point, and the C locale is assumed.
61  */
62 size_t
63 vsnprintf(char *buf, size_t buflen, const char *fmt, va_list aargs)
64 {
65 	uint64_t ul, tmp;
66 	char *bufp = buf;	/* current buffer pointer */
67 	int pad, width, base, sign, c, num;
68 	int prec, h_count, l_count, dot_count;
69 	int pad_count, transfer_count, left_align;
70 	char *digits, *sp, *bs;
71 	char numbuf[65];	/* sufficient for a 64-bit binary value */
72 	va_list args;
73 
74 	/*
75 	 * Make a copy so that all our callers don't have to make a copy
76 	 */
77 	va_copy(args, aargs);
78 
79 	if ((ssize_t)buflen < 0)
80 		buflen = 0;
81 
82 	while ((c = *fmt++) != '\0') {
83 		if (c != '%') {
84 			ADDCHAR(c);
85 			continue;
86 		}
87 
88 		width = prec = 0;
89 		left_align = base = sign = 0;
90 		h_count = l_count = dot_count = 0;
91 		pad = ' ';
92 		digits = "0123456789abcdef";
93 next_fmt:
94 		if ((c = *fmt++) == '\0')
95 			break;
96 
97 		if (c >= 'A' && c <= 'Z') {
98 			c += 'a' - 'A';
99 			digits = "0123456789ABCDEF";
100 		}
101 
102 		switch (c) {
103 		case '-':
104 			left_align++;
105 			goto next_fmt;
106 		case '0':
107 			if (dot_count == 0)
108 				pad = '0';
109 			/*FALLTHROUGH*/
110 		case '1':
111 		case '2':
112 		case '3':
113 		case '4':
114 		case '5':
115 		case '6':
116 		case '7':
117 		case '8':
118 		case '9':
119 			num = 0;
120 			for (;;) {
121 				num = 10 * num + c - '0';
122 				c = *fmt;
123 				if (c < '0' || c > '9')
124 					break;
125 				else
126 					fmt++;
127 			}
128 			if (dot_count > 0)
129 				prec = num;
130 			else
131 				width = num;
132 
133 			goto next_fmt;
134 		case '.':
135 			dot_count++;
136 			goto next_fmt;
137 		case '*':
138 			if (dot_count > 0)
139 				prec = (int)va_arg(args, int);
140 			else
141 				width = (int)va_arg(args, int);
142 			goto next_fmt;
143 		case 'l':
144 			l_count++;
145 			goto next_fmt;
146 		case 'h':
147 			h_count++;
148 			goto next_fmt;
149 		case 'd':
150 			sign = 1;
151 			/*FALLTHROUGH*/
152 		case 'u':
153 			base = 10;
154 			break;
155 		case 'p':
156 			l_count = 1;
157 			/*FALLTHROUGH*/
158 		case 'x':
159 			base = 16;
160 			break;
161 		case 'o':
162 			base = 8;
163 			break;
164 		case 'b':
165 			l_count = 0;
166 			base = 1;
167 			break;
168 		case 'c':
169 			c = (char)va_arg(args, int);
170 			ADDCHAR(c);
171 			break;
172 		case 's':
173 			sp = va_arg(args, char *);
174 			if (sp == NULL) {
175 				sp = "<null string>";
176 				/* avoid truncation */
177 				prec = strlen(sp);
178 			}
179 			/*
180 			 * Handle simple case specially to avoid
181 			 * performance hit of strlen()
182 			 */
183 			if (prec == 0 && width == 0) {
184 				while ((c = *sp++) != 0)
185 					ADDCHAR(c);
186 				break;
187 			}
188 			transfer_count = strlen(sp);
189 			if (prec > 0) {
190 				/* trim string if too long */
191 				if (transfer_count > prec)
192 					transfer_count = prec;
193 				/* widen field if too narrow */
194 				if (prec > width)
195 					width = prec;
196 			}
197 			if (width > transfer_count)
198 				pad_count = width - transfer_count;
199 			else
200 				pad_count = 0;
201 			while ((!left_align) && (pad_count-- > 0))
202 				ADDCHAR(' ');
203 			/* ADDCHAR() evaluates arg at most once */
204 			while (transfer_count-- > 0)
205 				ADDCHAR(*sp++);
206 			while ((left_align) && (pad_count-- > 0))
207 				ADDCHAR(' ');
208 			break;
209 		case '%':
210 			ADDCHAR('%');
211 			break;
212 		}
213 
214 		if (base == 0)
215 			continue;
216 
217 		if (h_count == 0 && l_count == 0)
218 			if (sign)
219 				ul = (int64_t)va_arg(args, int);
220 			else
221 				ul = (int64_t)va_arg(args, unsigned int);
222 		else if (l_count > 1)
223 			if (sign)
224 				ul = (int64_t)va_arg(args, int64_t);
225 			else
226 				ul = (int64_t)va_arg(args, uint64_t);
227 		else if (l_count > 0)
228 			if (sign)
229 				ul = (int64_t)va_arg(args, long);
230 			else
231 				ul = (int64_t)va_arg(args, unsigned long);
232 		else if (h_count > 1)
233 			if (sign)
234 				ul = (int64_t)((char)va_arg(args, int));
235 			else
236 				ul = (int64_t)((unsigned char)va_arg(args,
237 				    int));
238 		else if (h_count > 0)
239 			if (sign)
240 				ul = (int64_t)((short)va_arg(args, int));
241 			else
242 				ul = (int64_t)((unsigned short)va_arg(args,
243 				    int));
244 
245 		if (sign && (int64_t)ul < 0)
246 			ul = -ul;
247 		else
248 			sign = 0;
249 
250 		if (c == 'b') {
251 			bs = va_arg(args, char *);
252 			base = *bs++;
253 		}
254 
255 		/* avoid repeated division if width is 0 */
256 		if (width > 0) {
257 			tmp = ul;
258 			do {
259 				width--;
260 			} while ((tmp /= base) != 0);
261 		}
262 
263 		if (sign && pad == '0')
264 			ADDCHAR('-');
265 		while (width-- > sign)
266 			ADDCHAR(pad);
267 		if (sign && pad == ' ')
268 			ADDCHAR('-');
269 
270 		sp = numbuf;
271 		tmp = ul;
272 		do {
273 			*sp++ = digits[tmp % base];
274 		} while ((tmp /= base) != 0);
275 
276 		while (sp > numbuf) {
277 			sp--;
278 			ADDCHAR(*sp);
279 		}
280 
281 		if (c == 'b' && ul != 0) {
282 			int any = 0;
283 			c = *bs++;
284 			while (c != 0) {
285 				if (ul & (1 << (c - 1))) {
286 					if (any++ == 0)
287 						ADDCHAR('<');
288 					while ((c = *bs++) >= 32)
289 						ADDCHAR(c);
290 					ADDCHAR(',');
291 				} else {
292 					while ((c = *bs++) >= 32)
293 						continue;
294 				}
295 			}
296 			if (any) {
297 				bufp--;
298 				ADDCHAR('>');
299 			}
300 		}
301 	}
302 	if (bufp - buf < buflen)
303 		bufp[0] = c;
304 	else if (buflen != 0)
305 		buf[buflen - 1] = c;
306 
307 	va_end(args);
308 
309 	return (bufp - buf);
310 }
311 
312 /*PRINTFLIKE1*/
313 size_t
314 snprintf(char *buf, size_t buflen, const char *fmt, ...)
315 {
316 	va_list args;
317 
318 	va_start(args, fmt);
319 	buflen = vsnprintf(buf, buflen, fmt, args);
320 	va_end(args);
321 
322 	return (buflen);
323 }
324 
325 #if defined(_BOOT)
326 /*
327  * The sprintf() and vsprintf() routines aren't shared with the kernel because
328  * the DDI mandates that they return the buffer rather than its length.
329  */
330 /*PRINTFLIKE2*/
331 int
332 sprintf(char *buf, const char *fmt, ...)
333 {
334 	va_list args;
335 
336 	va_start(args, fmt);
337 	(void) vsnprintf(buf, INT_MAX, fmt, args);
338 	va_end(args);
339 
340 	return (strlen(buf));
341 }
342 
343 int
344 vsprintf(char *buf, const char *fmt, va_list args)
345 {
346 	(void) vsnprintf(buf, INT_MAX, fmt, args);
347 	return (strlen(buf));
348 }
349 #endif
350 
351 #endif /* !_KMDB */
352 
353 char *
354 strcat(char *s1, const char *s2)
355 {
356 	char *os1 = s1;
357 
358 	while (*s1++ != '\0')
359 		;
360 	s1--;
361 	while ((*s1++ = *s2++) != '\0')
362 		;
363 	return (os1);
364 }
365 
366 char *
367 strchr(const char *sp, int c)
368 {
369 	do {
370 		if (*sp == (char)c)
371 			return ((char *)sp);
372 	} while (*sp++);
373 	return (NULL);
374 }
375 
376 int
377 strcmp(const char *s1, const char *s2)
378 {
379 	while (*s1 == *s2++)
380 		if (*s1++ == '\0')
381 			return (0);
382 	return (*(unsigned char *)s1 - *(unsigned char *)--s2);
383 }
384 
385 int
386 strncmp(const char *s1, const char *s2, size_t n)
387 {
388 	if (s1 == s2)
389 		return (0);
390 	n++;
391 	while (--n != 0 && *s1 == *s2++)
392 		if (*s1++ == '\0')
393 			return (0);
394 	return ((n == 0) ? 0 : *(unsigned char *)s1 - *(unsigned char *)--s2);
395 }
396 
397 static const char charmap[] = {
398 	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
399 	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
400 	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
401 	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
402 	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
403 	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
404 	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
405 	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
406 	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
407 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
408 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
409 	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
410 	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
411 	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
412 	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
413 	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
414 	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
415 	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
416 	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
417 	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
418 	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
419 	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
420 	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
421 	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
422 	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
423 	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
424 	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
425 	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
426 	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
427 	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
428 	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
429 	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
430 };
431 
432 int
433 strcasecmp(const char *s1, const char *s2)
434 {
435 	const unsigned char *cm = (const unsigned char *)charmap;
436 	const unsigned char *us1 = (const unsigned char *)s1;
437 	const unsigned char *us2 = (const unsigned char *)s2;
438 
439 	while (cm[*us1] == cm[*us2++])
440 		if (*us1++ == '\0')
441 			return (0);
442 	return (cm[*us1] - cm[*(us2 - 1)]);
443 }
444 
445 int
446 strncasecmp(const char *s1, const char *s2, size_t n)
447 {
448 	const unsigned char *cm = (const unsigned char *)charmap;
449 	const unsigned char *us1 = (const unsigned char *)s1;
450 	const unsigned char *us2 = (const unsigned char *)s2;
451 
452 	while (n != 0 && cm[*us1] == cm[*us2++]) {
453 		if (*us1++ == '\0')
454 			return (0);
455 		n--;
456 	}
457 	return (n == 0 ? 0 : cm[*us1] - cm[*(us2 - 1)]);
458 }
459 
460 char *
461 strcpy(char *s1, const char *s2)
462 {
463 	char *os1 = s1;
464 
465 	while ((*s1++ = *s2++) != '\0')
466 		;
467 	return (os1);
468 }
469 
470 char *
471 strncpy(char *s1, const char *s2, size_t n)
472 {
473 	char *os1 = s1;
474 
475 	n++;
476 	while (--n != 0 && (*s1++ = *s2++) != '\0')
477 		;
478 	if (n != 0)
479 		while (--n != 0)
480 			*s1++ = '\0';
481 	return (os1);
482 }
483 
484 char *
485 strrchr(const char *sp, int c)
486 {
487 	char *r = NULL;
488 
489 	do {
490 		if (*sp == (char)c)
491 			r = (char *)sp;
492 	} while (*sp++);
493 
494 	return (r);
495 }
496 
497 char *
498 strstr(const char *as1, const char *as2)
499 {
500 	const char *s1, *s2;
501 	const char *tptr;
502 	char c;
503 
504 	s1 = as1;
505 	s2 = as2;
506 
507 	if (s2 == NULL || *s2 == '\0')
508 		return ((char *)s1);
509 	c = *s2;
510 
511 	while (*s1)
512 		if (*s1++ == c) {
513 			tptr = s1;
514 			while ((c = *++s2) == *s1++ && c)
515 				;
516 			if (c == 0)
517 				return ((char *)tptr - 1);
518 			s1 = tptr;
519 			s2 = as2;
520 			c = *s2;
521 		}
522 
523 	return (NULL);
524 }
525 
526 char *
527 strpbrk(const char *string, const char *brkset)
528 {
529 	const char *p;
530 
531 	do {
532 		for (p = brkset; *p != '\0' && *p != *string; ++p)
533 			;
534 		if (*p != '\0')
535 			return ((char *)string);
536 	} while (*string++);
537 
538 	return (NULL);
539 }
540 
541 char *
542 strncat(char *s1, const char *s2, size_t n)
543 {
544 	char *os1 = s1;
545 
546 	n++;
547 	while (*s1++ != '\0')
548 		;
549 	--s1;
550 	while ((*s1++ = *s2++) != '\0') {
551 		if (--n == 0) {
552 			s1[-1] = '\0';
553 			break;
554 		}
555 	}
556 	return (os1);
557 }
558 
559 #if defined(_BOOT) || defined(_KMDB)
560 #define	bcopy(src, dst, n)	(void) memcpy((dst), (src), (n))
561 #endif
562 
563 size_t
564 strlcat(char *dst, const char *src, size_t dstsize)
565 {
566 	char *df = dst;
567 	size_t left = dstsize;
568 	size_t l1;
569 	size_t l2 = strlen(src);
570 	size_t copied;
571 
572 	while (left-- != 0 && *df != '\0')
573 		df++;
574 	l1 = df - dst;
575 	if (dstsize == l1)
576 		return (l1 + l2);
577 
578 	copied = l1 + l2 >= dstsize ? dstsize - l1 - 1 : l2;
579 	bcopy(src, dst + l1, copied);
580 	dst[l1+copied] = '\0';
581 	return (l1 + l2);
582 }
583 
584 size_t
585 strlcpy(char *dst, const char *src, size_t len)
586 {
587 	size_t slen = strlen(src);
588 	size_t copied;
589 
590 	if (len == 0)
591 		return (slen);
592 
593 	if (slen >= len)
594 		copied = len - 1;
595 	else
596 		copied = slen;
597 	bcopy(src, dst, copied);
598 	dst[copied] = '\0';
599 	return (slen);
600 }
601 
602 size_t
603 strspn(const char *string, const char *charset)
604 {
605 	const char *p, *q;
606 
607 	for (q = string; *q != '\0'; ++q) {
608 		for (p = charset; *p != '\0' && *p != *q; ++p)
609 			;
610 		if (*p == '\0')
611 			break;
612 	}
613 
614 	return (q - string);
615 }
616 
617 /*
618  * Unless mentioned otherwise, all of the routines below should be added to
619  * the Solaris DDI as necessary.  For now, only provide them to standalone.
620  */
621 #if defined(_BOOT) || defined(_KMDB)
622 char *
623 strtok(char *string, const char *sepset)
624 {
625 	char		*p, *q, *r;
626 	static char	*savept;
627 
628 	/*
629 	 * Set `p' to our current location in the string.
630 	 */
631 	p = (string == NULL) ? savept : string;
632 	if (p == NULL)
633 		return (NULL);
634 
635 	/*
636 	 * Skip leading separators; bail if no tokens remain.
637 	 */
638 	q = p + strspn(p, sepset);
639 	if (*q == '\0')
640 		return (NULL);
641 
642 	/*
643 	 * Mark the end of the token and set `savept' for the next iteration.
644 	 */
645 	if ((r = strpbrk(q, sepset)) == NULL)
646 		savept = NULL;
647 	else {
648 		*r = '\0';
649 		savept = ++r;
650 	}
651 
652 	return (q);
653 }
654 
655 /*
656  * The strlen() routine isn't shared with the kernel because it has its own
657  * hand-tuned assembly version.
658  */
659 size_t
660 strlen(const char *s)
661 {
662 	size_t n = 0;
663 
664 	while (*s++)
665 		n++;
666 	return (n);
667 }
668 
669 #endif /* _BOOT || _KMDB */
670 
671 #ifdef _KERNEL
672 /*
673  * Check for a valid C identifier:
674  *	a letter or underscore, followed by
675  *	zero or more letters, digits and underscores.
676  */
677 
678 #define	IS_DIGIT(c)	((c) >= '0' && (c) <= '9')
679 
680 #define	IS_ALPHA(c)	\
681 	(((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
682 
683 int
684 strident_valid(const char *id)
685 {
686 	int c = *id++;
687 
688 	if (!IS_ALPHA(c) && c != '_')
689 		return (0);
690 	while ((c = *id++) != 0) {
691 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
692 			return (0);
693 	}
694 	return (1);
695 }
696 
697 /*
698  * Convert a string into a valid C identifier by replacing invalid
699  * characters with '_'.  Also makes sure the string is nul-terminated
700  * and takes up at most n bytes.
701  */
702 void
703 strident_canon(char *s, size_t n)
704 {
705 	char c;
706 	char *end = s + n - 1;
707 
708 	ASSERT(n > 0);
709 
710 	if ((c = *s) == 0)
711 		return;
712 
713 	if (!IS_ALPHA(c) && c != '_')
714 		*s = '_';
715 
716 	while (s < end && ((c = *(++s)) != 0)) {
717 		if (!IS_ALPHA(c) && !IS_DIGIT(c) && c != '_')
718 			*s = '_';
719 	}
720 	*s = 0;
721 }
722 
723 #endif	/* _KERNEL */
724