1 /*
2  * Copyright (c) 2008-2012 Hypertriton, Inc. <http://hypertriton.com/>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
18  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
23  * USE OF THIS SOFTWARE EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 /*-
26  * Copyright (c) 1990, 1993
27  *	The Regents of the University of California.  All rights reserved.
28  *
29  * This code is derived from software contributed to Berkeley by
30  * Chris Torek.
31  *
32  * Redistribution and use in source and binary forms, with or without
33  * modification, are permitted provided that the following conditions
34  * are met:
35  * 1. Redistributions of source code must retain the above copyright
36  *    notice, this list of conditions and the following disclaimer.
37  * 2. Redistributions in binary form must reproduce the above copyright
38  *    notice, this list of conditions and the following disclaimer in the
39  *    documentation and/or other materials provided with the distribution.
40  * 3. Neither the name of the University nor the names of its contributors
41  *    may be used to endorse or promote products derived from this software
42  *    without specific prior written permission.
43  *
44  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
45  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
46  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
47  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
48  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
49  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
50  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
51  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
52  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
53  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
54  * SUCH DAMAGE.
55  */
56 
57 #ifndef	_AGAR_CORE_STRING_H_
58 #define	_AGAR_CORE_STRING_H_
59 
60 #define AG_STRING_BUFFERS_MAX	8			/* For AG_Printf() */
61 #define AG_STRING_POINTERS_MAX	32			/* For AG_Printf */
62 
63 #include <agar/core/begin.h>
64 
65 typedef struct ag_fmt_string {
66 	char *s;				/* Format string */
67 	void *p[AG_STRING_POINTERS_MAX];	/* Variable references */
68 	AG_Mutex *mu[AG_STRING_POINTERS_MAX];	/* Protecting variables */
69 	Uint n;
70 	int curArg;				/* For internal parser use */
71 } AG_FmtString;
72 
73 /* Extended format specifier for polled labels. */
74 typedef size_t (*AG_FmtStringExtFn)(struct ag_fmt_string *, char *, size_t);
75 typedef struct ag_fmt_string_ext {
76 	char *fmt;
77 	size_t fmtLen;
78 	AG_FmtStringExtFn fn;
79 } AG_FmtStringExt;
80 
81 #define AG_FMTSTRING_ARG(fs) ((fs)->p[fs->curArg++])
82 #define AG_FMTSTRING_BUFFER_INIT 128
83 #define AG_FMTSTRING_BUFFER_GROW 128
84 
85 __BEGIN_DECLS
86 extern const unsigned char agStrcasecmpMapASCII[];
87 
88 char         *AG_Printf(const char *, ...);
89 char         *AG_PrintfN(Uint, const char *, ...);
90 AG_FmtString *AG_PrintfP(const char *, ...);
91 void          AG_RegisterFmtStringExt(const char *, AG_FmtStringExtFn);
92 void          AG_UnregisterFmtStringExt(const char *);
93 size_t        AG_ProcessFmtString(AG_FmtString *, char *, size_t);
94 void          AG_FreeFmtString(AG_FmtString *);
95 
96 char  *AG_Strsep(char **, const char *);
97 char  *AG_Strdup(const char *);
98 char  *AG_TryStrdup(const char *);
99 size_t AG_Strlcpy(char *, const char *, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
100 size_t AG_Strlcat(char *, const char *, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
101 size_t AG_StrlcpyInt(char *, int, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
102 size_t AG_StrlcatInt(char *, int, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
103 size_t AG_StrlcpyUint(char *, Uint, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
104 size_t AG_StrlcatUint(char *, Uint, size_t) BOUNDED_ATTRIBUTE(__string__,1,3);
105 
106 const char *AG_Strcasestr(const char *, const char *);
107 void        AG_StrReverse(char *);
108 
109 Uint32	*AG_ImportUnicode(const char *, const char *, size_t *, size_t *);
110 int      AG_ExportUnicode(const char *, char *, const Uint32 *, size_t)
111 	     BOUNDED_ATTRIBUTE(__string__, 2, 4);
112 
113 int    AG_InitStringSubsystem(void);
114 void   AG_DestroyStringSubsystem(void);
115 
116 /*
117  * Return the length of a UCS-4 string in characters, without the
118  * terminating NUL.
119  */
120 static __inline__ size_t
AG_LengthUCS4(const Uint32 * ucs)121 AG_LengthUCS4(const Uint32 *ucs)
122 {
123 	size_t len;
124 
125 	for (len = 0; *ucs != '\0'; ucs++) {
126 		len++;
127 	}
128 	return (len);
129 }
130 
131 /*
132  * Return the number of bytes that would be needed to encode the given
133  * UCS-4 character in UTF-8.
134  */
135 static __inline__ int
AG_CharLengthUTF8FromUCS4(Uint32 ch)136 AG_CharLengthUTF8FromUCS4(Uint32 ch)
137 {
138 	if      (ch <  0x80)		{ return (1); }
139 	else if (ch <  0x800)		{ return (2); }
140 	else if (ch <  0x10000)		{ return (3); }
141 	else if (ch <  0x200000)	{ return (4); }
142 	else if (ch <  0x4000000)	{ return (5); }
143 	else if (ch <= 0x7fffffff)	{ return (6); }
144 
145 	AG_SetError("Bad UCS-4 character");
146 	return (-1);
147 }
148 
149 /*
150  * Return the number of bytes (not including the terminating NUL) that would
151  * be needed to encode the given UCS-4 string in UTF-8.
152  */
153 static __inline__ int
AG_LengthUTF8FromUCS4(const Uint32 * ucs4,size_t * rv)154 AG_LengthUTF8FromUCS4(const Uint32 *ucs4, size_t *rv)
155 {
156 	const Uint32 *c;
157 	int cLen;
158 
159 	*rv = 0;
160 	for (c = &ucs4[0]; *c != '\0'; c++) {
161 		if ((cLen = AG_CharLengthUTF8FromUCS4(*c)) == -1) {
162 			return (-1);
163 		}
164 		(*rv) += cLen;
165 	}
166 	return (0);
167 }
168 
169 /*
170  * Parse the first byte of a possible UTF-8 sequence and return the length
171  * of the sequence in bytes (or 1 if there is none).
172  */
173 static __inline__ int
AG_CharLengthUTF8(unsigned char ch)174 AG_CharLengthUTF8(unsigned char ch)
175 {
176 	int rv;
177 
178 	if ((ch >> 7) == 0) {
179 		rv = 1;
180 	} else if (((ch & 0xe0) >> 5) == 0x6) {
181 		rv = 2;
182 	} else if (((ch & 0xf0) >> 4) == 0xe) {
183 		rv = 3;
184 	} else if (((ch & 0xf8) >> 3) == 0x1e) {
185 		rv = 4;
186 	} else if (((ch & 0xfc) >> 2) == 0x3e) {
187 		rv = 5;
188 	} else if (((ch & 0xfe) >> 1) == 0x7e) {
189 		rv = 6;
190 	} else {
191 		AG_SetError("Bad UTF-8 sequence");
192 		return (-1);
193 	}
194 	return (rv);
195 }
196 
197 /*
198  * Return the number of characters in the given UTF-8 string, not counting
199  * the terminating NUL. If the string is invalid, fail and return -1.
200  */
201 static __inline__ int
AG_LengthUTF8(const char * s,size_t * rv)202 AG_LengthUTF8(const char *s, size_t *rv)
203 {
204 	const char *c = &s[0];
205 	int i, cLen;
206 
207 	*rv = 0;
208 	if (s[0] == '\0') {
209 		return (0);
210 	}
211 	for (;;) {
212 		if ((cLen = AG_CharLengthUTF8((unsigned char)*c)) == -1) {
213 			return (-1);
214 		}
215 		for (i = 0; i < cLen; i++) {
216 			if (c[i] == '\0')
217 				return (0);
218 		}
219 		(*rv)++;
220 		c += cLen;
221 	}
222 	return (0);
223 }
224 
225 /*
226  * Compare two strings ignoring case.
227  */
228 static __inline__ int
AG_Strcasecmp(const char * s1,const char * s2)229 AG_Strcasecmp(const char *s1, const char *s2)
230 {
231 	const unsigned char *cm = agStrcasecmpMapASCII;
232 	const unsigned char *us1 = (const unsigned char *)s1;
233 	const unsigned char *us2 = (const unsigned char *)s2;
234 
235 	while (cm[*us1] == cm[*us2++]) {
236 		if (*us1++ == '\0')
237 			return (0);
238 	}
239 	return (cm[*us1] - cm[*--us2]);
240 }
241 
242 /*
243  * Compare the first n-characters of two strings ignoring case.
244  */
245 static __inline__ int
AG_Strncasecmp(const char * s1,const char * s2,size_t n)246 AG_Strncasecmp(const char *s1, const char *s2, size_t n)
247 {
248 	const unsigned char *cm = agStrcasecmpMapASCII;
249 	const unsigned char *us1 = (const unsigned char *)s1;
250 	const unsigned char *us2 = (const unsigned char *)s2;
251 	size_t i;
252 
253 	for (i = 0; i < n; i++) {
254 		if (cm[us1[i]] != cm[us2[i]])
255 			break;
256 	}
257 	return i == n ? 0 : cm[us1[i]] - cm[us2[i]];
258 }
259 __END_DECLS
260 
261 #if defined(_AGAR_INTERNAL) || defined(_USE_AGAR_STD)
262 #define Strlcat AG_Strlcat
263 #define Strlcpy AG_Strlcpy
264 #define Strsep AG_Strsep
265 #define Strdup AG_Strdup
266 #define TryStrdup AG_TryStrdup
267 #define Strcasecmp AG_Strcasecmp
268 #define Strncasecmp AG_Strncasecmp
269 #define Strcasestr AG_Strcasestr
270 #define StrReverse AG_StrReverse
271 #define StrlcpyInt AG_StrlcpyInt
272 #define StrlcatInt AG_StrlcatInt
273 #define StrlcpyUint AG_StrlcpyUint
274 #define StrlcatUint AG_StrlcatUint
275 #endif
276 
277 #include <agar/core/close.h>
278 #endif /* _AGAR_CORE_STRING_H_ */
279