xref: /freebsd/lib/libfigpar/string_m.c (revision 4268f3b3)
1041394f3SDevin Teske /*-
2041394f3SDevin Teske  * Copyright (c) 2001-2014 Devin Teske <dteske@FreeBSD.org>
3041394f3SDevin Teske  * All rights reserved.
4041394f3SDevin Teske  *
5041394f3SDevin Teske  * Redistribution and use in source and binary forms, with or without
6041394f3SDevin Teske  * modification, are permitted provided that the following conditions
7041394f3SDevin Teske  * are met:
8041394f3SDevin Teske  * 1. Redistributions of source code must retain the above copyright
9041394f3SDevin Teske  *    notice, this list of conditions and the following disclaimer.
10041394f3SDevin Teske  * 2. Redistributions in binary form must reproduce the above copyright
11041394f3SDevin Teske  *    notice, this list of conditions and the following disclaimer in the
12041394f3SDevin Teske  *    documentation and/or other materials provided with the distribution.
13041394f3SDevin Teske  *
14041394f3SDevin Teske  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15041394f3SDevin Teske  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16041394f3SDevin Teske  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17041394f3SDevin Teske  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18041394f3SDevin Teske  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19041394f3SDevin Teske  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20041394f3SDevin Teske  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21041394f3SDevin Teske  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22041394f3SDevin Teske  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23041394f3SDevin Teske  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24041394f3SDevin Teske  * SUCH DAMAGE.
25041394f3SDevin Teske  */
26041394f3SDevin Teske 
27041394f3SDevin Teske #include <sys/cdefs.h>
28041394f3SDevin Teske __FBSDID("$FreeBSD$");
29041394f3SDevin Teske 
30041394f3SDevin Teske #include <sys/types.h>
31041394f3SDevin Teske 
32041394f3SDevin Teske #include <ctype.h>
33041394f3SDevin Teske #include <errno.h>
34041394f3SDevin Teske #include <stdio.h>
35041394f3SDevin Teske #include <stdlib.h>
36041394f3SDevin Teske #include <string.h>
37041394f3SDevin Teske 
38041394f3SDevin Teske #include "string_m.h"
39041394f3SDevin Teske 
40041394f3SDevin Teske /*
41041394f3SDevin Teske  * Counts the number of occurrences of one string that appear in the source
42041394f3SDevin Teske  * string. Return value is the total count.
43041394f3SDevin Teske  *
44041394f3SDevin Teske  * An example use would be if you need to know how large a block of memory
45041394f3SDevin Teske  * needs to be for a replaceall() series.
46041394f3SDevin Teske  */
47041394f3SDevin Teske unsigned int
48041394f3SDevin Teske strcount(const char *source, const char *find)
49041394f3SDevin Teske {
50041394f3SDevin Teske 	const char *p = source;
51041394f3SDevin Teske 	size_t flen;
52041394f3SDevin Teske 	unsigned int n = 0;
53041394f3SDevin Teske 
54041394f3SDevin Teske 	/* Both parameters are required */
55041394f3SDevin Teske 	if (source == NULL || find == NULL)
56041394f3SDevin Teske 		return (0);
57041394f3SDevin Teske 
58041394f3SDevin Teske 	/* Cache the length of find element */
59041394f3SDevin Teske 	flen = strlen(find);
60041394f3SDevin Teske 	if (strlen(source) == 0 || flen == 0)
61041394f3SDevin Teske 		return (0);
62041394f3SDevin Teske 
63041394f3SDevin Teske 	/* Loop until the end of the string */
64041394f3SDevin Teske 	while (*p != '\0') {
65041394f3SDevin Teske 		if (strncmp(p, find, flen) == 0) { /* found an instance */
66041394f3SDevin Teske 			p += flen;
67041394f3SDevin Teske 			n++;
68041394f3SDevin Teske 		} else
69041394f3SDevin Teske 			p++;
70041394f3SDevin Teske 	}
71041394f3SDevin Teske 
72041394f3SDevin Teske 	return (n);
73041394f3SDevin Teske }
74041394f3SDevin Teske 
75041394f3SDevin Teske /*
76041394f3SDevin Teske  * Replaces all occurrences of `find' in `source' with `replace'.
77041394f3SDevin Teske  *
78041394f3SDevin Teske  * You should not pass a string constant as the first parameter, it needs to be
79041394f3SDevin Teske  * a pointer to an allocated block of memory. The block of memory that source
80041394f3SDevin Teske  * points to should be large enough to hold the result. If the length of the
81041394f3SDevin Teske  * replacement string is greater than the length of the find string, the result
82041394f3SDevin Teske  * will be larger than the original source string. To allocate enough space for
83041394f3SDevin Teske  * the result, use the function strcount() declared above to determine the
84041394f3SDevin Teske  * number of occurrences and how much larger the block size needs to be.
85041394f3SDevin Teske  *
86041394f3SDevin Teske  * If source is not large enough, the application will crash. The return value
87041394f3SDevin Teske  * is the length (in bytes) of the result.
88041394f3SDevin Teske  *
89041394f3SDevin Teske  * When an error occurs, -1 is returned and the global variable errno is set
90041394f3SDevin Teske  * accordingly. Returns zero on success.
91041394f3SDevin Teske  */
92041394f3SDevin Teske int
93041394f3SDevin Teske replaceall(char *source, const char *find, const char *replace)
94041394f3SDevin Teske {
95041394f3SDevin Teske 	char *p;
96041394f3SDevin Teske 	char *t;
97041394f3SDevin Teske 	char *temp;
98041394f3SDevin Teske 	size_t flen;
99041394f3SDevin Teske 	size_t rlen;
100041394f3SDevin Teske 	size_t slen;
101041394f3SDevin Teske 	uint32_t n = 0;
102041394f3SDevin Teske 
103041394f3SDevin Teske 	errno = 0; /* reset global error number */
104041394f3SDevin Teske 
105041394f3SDevin Teske 	/* Check that we have non-null parameters */
106041394f3SDevin Teske 	if (source == NULL)
107041394f3SDevin Teske 		return (0);
108041394f3SDevin Teske 	if (find == NULL)
109041394f3SDevin Teske 		return (strlen(source));
110041394f3SDevin Teske 
111041394f3SDevin Teske 	/* Cache the length of the strings */
112041394f3SDevin Teske 	slen = strlen(source);
113041394f3SDevin Teske 	flen = strlen(find);
114041394f3SDevin Teske 	rlen = replace ? strlen(replace) : 0;
115041394f3SDevin Teske 
116041394f3SDevin Teske 	/* Cases where no replacements need to be made */
117041394f3SDevin Teske 	if (slen == 0 || flen == 0 || slen < flen)
118041394f3SDevin Teske 		return (slen);
119041394f3SDevin Teske 
120041394f3SDevin Teske 	/* If replace is longer than find, we'll need to create a temp copy */
121041394f3SDevin Teske 	if (rlen > flen) {
1224268f3b3SStefan Eßer 		temp = strdup(source);
1234268f3b3SStefan Eßer 		if (temp == NULL) /* could not allocate memory */
124041394f3SDevin Teske 			return (-1);
125041394f3SDevin Teske 	} else
126041394f3SDevin Teske 		temp = source;
127041394f3SDevin Teske 
128041394f3SDevin Teske 	/* Reconstruct the string with the replacements */
129041394f3SDevin Teske 	p = source; t = temp; /* position elements */
130041394f3SDevin Teske 
131041394f3SDevin Teske 	while (*t != '\0') {
132041394f3SDevin Teske 		if (strncmp(t, find, flen) == 0) {
133041394f3SDevin Teske 			/* found an occurrence */
134041394f3SDevin Teske 			for (n = 0; replace && replace[n]; n++)
135041394f3SDevin Teske 				*p++ = replace[n];
136041394f3SDevin Teske 			t += flen;
137041394f3SDevin Teske 		} else
138041394f3SDevin Teske 			*p++ = *t++; /* copy character and increment */
139041394f3SDevin Teske 	}
140041394f3SDevin Teske 
141041394f3SDevin Teske 	/* Terminate the string */
142041394f3SDevin Teske 	*p = '\0';
143041394f3SDevin Teske 
144041394f3SDevin Teske 	/* Free the temporary allocated memory */
145041394f3SDevin Teske 	if (temp != source)
146041394f3SDevin Teske 		free(temp);
147041394f3SDevin Teske 
148041394f3SDevin Teske 	/* Return the length of the completed string */
149041394f3SDevin Teske 	return (strlen(source));
150041394f3SDevin Teske }
151041394f3SDevin Teske 
152041394f3SDevin Teske /*
153041394f3SDevin Teske  * Expands escape sequences in a buffer pointed to by `source'. This function
154041394f3SDevin Teske  * steps through each character, and converts escape sequences such as "\n",
155041394f3SDevin Teske  * "\r", "\t" and others into their respective meanings.
156041394f3SDevin Teske  *
157041394f3SDevin Teske  * You should not pass a string constant or literal to this function or the
158041394f3SDevin Teske  * program will likely segmentation fault when it tries to modify the data.
159041394f3SDevin Teske  *
160041394f3SDevin Teske  * The string length will either shorten or stay the same depending on whether
161041394f3SDevin Teske  * any escape sequences were converted but the amount of memory allocated does
162041394f3SDevin Teske  * not change.
163041394f3SDevin Teske  *
164041394f3SDevin Teske  * Interpreted sequences are:
165041394f3SDevin Teske  *
166041394f3SDevin Teske  * 	\0NNN	character with octal value NNN (0 to 3 digits)
167041394f3SDevin Teske  * 	\N	character with octal value N (0 thru 7)
168041394f3SDevin Teske  * 	\a	alert (BEL)
169041394f3SDevin Teske  * 	\b	backslash
170041394f3SDevin Teske  * 	\f	form feed
171041394f3SDevin Teske  * 	\n	new line
172041394f3SDevin Teske  * 	\r	carriage return
173041394f3SDevin Teske  * 	\t	horizontal tab
174041394f3SDevin Teske  * 	\v	vertical tab
175041394f3SDevin Teske  * 	\xNN	byte with hexadecimal value NN (1 to 2 digits)
176041394f3SDevin Teske  *
177041394f3SDevin Teske  * All other sequences are unescaped (ie. '\"' and '\#').
178041394f3SDevin Teske  */
179041394f3SDevin Teske void strexpand(char *source)
180041394f3SDevin Teske {
181041394f3SDevin Teske 	uint8_t c;
182041394f3SDevin Teske 	char *chr;
183041394f3SDevin Teske 	char *pos;
184041394f3SDevin Teske 	char d[4];
185041394f3SDevin Teske 
186041394f3SDevin Teske 	/* Initialize position elements */
187041394f3SDevin Teske 	pos = chr = source;
188041394f3SDevin Teske 
189041394f3SDevin Teske 	/* Loop until we hit the end of the string */
190041394f3SDevin Teske 	while (*pos != '\0') {
191041394f3SDevin Teske 		if (*chr != '\\') {
192041394f3SDevin Teske 			*pos = *chr; /* copy character to current offset */
193041394f3SDevin Teske 			pos++;
194041394f3SDevin Teske 			chr++;
195041394f3SDevin Teske 			continue;
196041394f3SDevin Teske 		}
197041394f3SDevin Teske 
198041394f3SDevin Teske 		/* Replace the backslash with the correct character */
199041394f3SDevin Teske 		switch (*++chr) {
200041394f3SDevin Teske 		case 'a': *pos = '\a'; break; /* bell/alert (BEL) */
201041394f3SDevin Teske 		case 'b': *pos = '\b'; break; /* backspace */
202041394f3SDevin Teske 		case 'f': *pos = '\f'; break; /* form feed */
203041394f3SDevin Teske 		case 'n': *pos = '\n'; break; /* new line */
204041394f3SDevin Teske 		case 'r': *pos = '\r'; break; /* carriage return */
205041394f3SDevin Teske 		case 't': *pos = '\t'; break; /* horizontal tab */
206041394f3SDevin Teske 		case 'v': *pos = '\v'; break; /* vertical tab */
207041394f3SDevin Teske 		case 'x': /* hex value (1 to 2 digits)(\xNN) */
208041394f3SDevin Teske 			d[2] = '\0'; /* pre-terminate the string */
209041394f3SDevin Teske 
210041394f3SDevin Teske 			/* verify next two characters are hex */
211041394f3SDevin Teske 			d[0] = isxdigit(*(chr+1)) ? *++chr : '\0';
212041394f3SDevin Teske 			if (d[0] != '\0')
213041394f3SDevin Teske 				d[1] = isxdigit(*(chr+1)) ? *++chr : '\0';
214041394f3SDevin Teske 
215041394f3SDevin Teske 			/* convert the characters to decimal */
216041394f3SDevin Teske 			c = (uint8_t)strtoul(d, 0, 16);
217041394f3SDevin Teske 
218041394f3SDevin Teske 			/* assign the converted value */
219041394f3SDevin Teske 			*pos = (c != 0 || d[0] == '0') ? c : *++chr;
220041394f3SDevin Teske 			break;
221041394f3SDevin Teske 		case '0': /* octal value (0 to 3 digits)(\0NNN) */
222041394f3SDevin Teske 			d[3] = '\0'; /* pre-terminate the string */
223041394f3SDevin Teske 
224041394f3SDevin Teske 			/* verify next three characters are octal */
225041394f3SDevin Teske 			d[0] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
226041394f3SDevin Teske 			    *++chr : '\0';
227041394f3SDevin Teske 			if (d[0] != '\0')
228041394f3SDevin Teske 				d[1] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
229041394f3SDevin Teske 				    *++chr : '\0';
230041394f3SDevin Teske 			if (d[1] != '\0')
231041394f3SDevin Teske 				d[2] = (isdigit(*(chr+1)) && *(chr+1) < '8') ?
232041394f3SDevin Teske 				    *++chr : '\0';
233041394f3SDevin Teske 
234041394f3SDevin Teske 			/* convert the characters to decimal */
235041394f3SDevin Teske 			c = (uint8_t)strtoul(d, 0, 8);
236041394f3SDevin Teske 
237041394f3SDevin Teske 			/* assign the converted value */
238041394f3SDevin Teske 			*pos = c;
239041394f3SDevin Teske 			break;
240041394f3SDevin Teske 		default: /* single octal (\0..7) or unknown sequence */
241041394f3SDevin Teske 			if (isdigit(*chr) && *chr < '8') {
242041394f3SDevin Teske 				d[0] = *chr;
243041394f3SDevin Teske 				d[1] = '\0';
244041394f3SDevin Teske 				*pos = (uint8_t)strtoul(d, 0, 8);
245041394f3SDevin Teske 			} else
246041394f3SDevin Teske 				*pos = *chr;
247041394f3SDevin Teske 		}
248041394f3SDevin Teske 
249041394f3SDevin Teske 		/* Increment to next offset, possible next escape sequence */
250041394f3SDevin Teske 		pos++;
251041394f3SDevin Teske 		chr++;
252041394f3SDevin Teske 	}
253041394f3SDevin Teske }
254041394f3SDevin Teske 
255041394f3SDevin Teske /*
256041394f3SDevin Teske  * Expand only the escaped newlines in a buffer pointed to by `source'. This
257041394f3SDevin Teske  * function steps through each character, and converts the "\n" sequence into
258041394f3SDevin Teske  * a literal newline and the "\\n" sequence into "\n".
259041394f3SDevin Teske  *
260041394f3SDevin Teske  * You should not pass a string constant or literal to this function or the
261041394f3SDevin Teske  * program will likely segmentation fault when it tries to modify the data.
262041394f3SDevin Teske  *
263041394f3SDevin Teske  * The string length will either shorten or stay the same depending on whether
264041394f3SDevin Teske  * any escaped newlines were converted but the amount of memory allocated does
265041394f3SDevin Teske  * not change.
266041394f3SDevin Teske  */
267041394f3SDevin Teske void strexpandnl(char *source)
268041394f3SDevin Teske {
269041394f3SDevin Teske 	uint8_t backslash = 0;
270041394f3SDevin Teske 	char *cp1;
271041394f3SDevin Teske 	char *cp2;
272041394f3SDevin Teske 
273041394f3SDevin Teske 	/* Replace '\n' with literal in dprompt */
274041394f3SDevin Teske 	cp1 = cp2 = source;
275041394f3SDevin Teske 	while (*cp2 != '\0') {
276041394f3SDevin Teske 		*cp1 = *cp2;
277041394f3SDevin Teske 		if (*cp2 == '\\')
278041394f3SDevin Teske 			backslash++;
279041394f3SDevin Teske 		else if (*cp2 != 'n')
280041394f3SDevin Teske 			backslash = 0;
281041394f3SDevin Teske 		else if (backslash > 0) {
282041394f3SDevin Teske 			*(--cp1) = (backslash & 1) == 1 ? '\n' : 'n';
283041394f3SDevin Teske 			backslash = 0;
284041394f3SDevin Teske 		}
285041394f3SDevin Teske 		cp1++;
286041394f3SDevin Teske 		cp2++;
287041394f3SDevin Teske 	}
288041394f3SDevin Teske 	*cp1 = *cp2;
289041394f3SDevin Teske }
290041394f3SDevin Teske 
291041394f3SDevin Teske /*
292041394f3SDevin Teske  * Convert a string to lower case. You should not pass a string constant to
293041394f3SDevin Teske  * this function. Only pass pointers to allocated memory with null terminated
294041394f3SDevin Teske  * string data.
295041394f3SDevin Teske  */
296041394f3SDevin Teske void
297041394f3SDevin Teske strtolower(char *source)
298041394f3SDevin Teske {
299041394f3SDevin Teske 	char *p = source;
300041394f3SDevin Teske 
301041394f3SDevin Teske 	if (source == NULL)
302041394f3SDevin Teske 		return;
303041394f3SDevin Teske 
304041394f3SDevin Teske 	while (*p != '\0') {
305041394f3SDevin Teske 		*p = tolower(*p);
306041394f3SDevin Teske 		p++; /* would have just used `*p++' but gcc 3.x warns */
307041394f3SDevin Teske 	}
308041394f3SDevin Teske }
309