1 /*-
2  * Copyright (c) 2003-2010 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  *
27  */
28 
29 #ifndef __LIBARCHIVE_BUILD
30 #ifndef __LIBARCHIVE_TEST
31 #error This header is only to be used internally to libarchive.
32 #endif
33 #endif
34 
35 #ifndef ARCHIVE_STRING_H_INCLUDED
36 #define	ARCHIVE_STRING_H_INCLUDED
37 
38 #include <stdarg.h>
39 #ifdef HAVE_STDLIB_H
40 #include <stdlib.h>  /* required for wchar_t on some systems */
41 #endif
42 #ifdef HAVE_STRING_H
43 #include <string.h>
44 #endif
45 #ifdef HAVE_WCHAR_H
46 #include <wchar.h>
47 #endif
48 
49 #include "archive.h"
50 
51 /*
52  * Basic resizable/reusable string support similar to Java's "StringBuffer."
53  *
54  * Unlike sbuf(9), the buffers here are fully reusable and track the
55  * length throughout.
56  */
57 
58 struct archive_string {
59 	char	*s;  /* Pointer to the storage */
60 	size_t	 length; /* Length of 's' in characters */
61 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
62 };
63 
64 struct archive_wstring {
65 	wchar_t	*s;  /* Pointer to the storage */
66 	size_t	 length; /* Length of 's' in characters */
67 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
68 };
69 
70 struct archive_string_conv;
71 
72 /* Initialize an archive_string object on the stack or elsewhere. */
73 #define	archive_string_init(a)	\
74 	do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0)
75 
76 /* Append a C char to an archive_string, resizing as necessary. */
77 struct archive_string *
78 archive_strappend_char(struct archive_string *, char);
79 
80 /* Ditto for a wchar_t and an archive_wstring. */
81 struct archive_wstring *
82 archive_wstrappend_wchar(struct archive_wstring *, wchar_t);
83 
84 /* Convert a Unicode string to current locale and append the result. */
85 /* Returns -1 if conversion fails. */
86 int
87 archive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t);
88 
89 
90 /* Create a string conversion object.
91  * Return NULL and set a error message if the conversion is not supported
92  * on the platform. */
93 struct archive_string_conv *
94 archive_string_conversion_to_charset(struct archive *, const char *, int);
95 struct archive_string_conv *
96 archive_string_conversion_from_charset(struct archive *, const char *, int);
97 /* Create the default string conversion object for reading/writing an archive.
98  * Return NULL if the conversion is unneeded.
99  * Note: On non Windows platform this always returns NULL.
100  */
101 struct archive_string_conv *
102 archive_string_default_conversion_for_read(struct archive *);
103 struct archive_string_conv *
104 archive_string_default_conversion_for_write(struct archive *);
105 /* Dispose of a string conversion object. */
106 void
107 archive_string_conversion_free(struct archive *);
108 const char *
109 archive_string_conversion_charset_name(struct archive_string_conv *);
110 void
111 archive_string_conversion_set_opt(struct archive_string_conv *, int);
112 #define SCONV_SET_OPT_UTF8_LIBARCHIVE2X	1
113 #define SCONV_SET_OPT_NORMALIZATION_C	2
114 #define SCONV_SET_OPT_NORMALIZATION_D	4
115 
116 
117 /* Copy one archive_string to another in locale conversion.
118  * Return -1 if conversion failes. */
119 int
120 archive_strncpy_l(struct archive_string *, const void *, size_t,
121     struct archive_string_conv *);
122 
123 /* Copy one archive_string to another in locale conversion.
124  * Return -1 if conversion failes. */
125 int
126 archive_strncat_l(struct archive_string *, const void *, size_t,
127     struct archive_string_conv *);
128 
129 
130 /* Copy one archive_string to another */
131 #define	archive_string_copy(dest, src) \
132 	((dest)->length = 0, archive_string_concat((dest), (src)))
133 #define	archive_wstring_copy(dest, src) \
134 	((dest)->length = 0, archive_wstring_concat((dest), (src)))
135 
136 /* Concatenate one archive_string to another */
137 void archive_string_concat(struct archive_string *dest, struct archive_string *src);
138 void archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src);
139 
140 /* Ensure that the underlying buffer is at least as large as the request. */
141 struct archive_string *
142 archive_string_ensure(struct archive_string *, size_t);
143 struct archive_wstring *
144 archive_wstring_ensure(struct archive_wstring *, size_t);
145 
146 /* Append C string, which may lack trailing \0. */
147 /* The source is declared void * here because this gets used with
148  * "signed char *", "unsigned char *" and "char *" arguments.
149  * Declaring it "char *" as with some of the other functions just
150  * leads to a lot of extra casts. */
151 struct archive_string *
152 archive_strncat(struct archive_string *, const void *, size_t);
153 struct archive_wstring *
154 archive_wstrncat(struct archive_wstring *, const wchar_t *, size_t);
155 
156 /* Append a C string to an archive_string, resizing as necessary. */
157 struct archive_string *
158 archive_strcat(struct archive_string *, const void *);
159 struct archive_wstring *
160 archive_wstrcat(struct archive_wstring *, const wchar_t *);
161 
162 /* Copy a C string to an archive_string, resizing as necessary. */
163 #define	archive_strcpy(as,p) \
164 	archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p)))
165 #define	archive_wstrcpy(as,p) \
166 	archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p)))
167 #define	archive_strcpy_l(as,p,lo) \
168 	archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo))
169 
170 /* Copy a C string to an archive_string with limit, resizing as necessary. */
171 #define	archive_strncpy(as,p,l) \
172 	((as)->length=0, archive_strncat((as), (p), (l)))
173 #define	archive_wstrncpy(as,p,l) \
174 	((as)->length = 0, archive_wstrncat((as), (p), (l)))
175 
176 /* Return length of string. */
177 #define	archive_strlen(a) ((a)->length)
178 
179 /* Set string length to zero. */
180 #define	archive_string_empty(a) ((a)->length = 0)
181 #define	archive_wstring_empty(a) ((a)->length = 0)
182 
183 /* Release any allocated storage resources. */
184 void	archive_string_free(struct archive_string *);
185 void	archive_wstring_free(struct archive_wstring *);
186 
187 /* Like 'vsprintf', but resizes the underlying string as necessary. */
188 /* Note: This only implements a small subset of standard printf functionality. */
189 void	archive_string_vsprintf(struct archive_string *, const char *,
190 	    va_list) __LA_PRINTF(2, 0);
191 void	archive_string_sprintf(struct archive_string *, const char *, ...)
192 	    __LA_PRINTF(2, 3);
193 
194 /* Translates from MBS to Unicode. */
195 /* Returns non-zero if conversion failed in any way. */
196 int archive_wstring_append_from_mbs(struct archive_wstring *dest,
197     const char *, size_t);
198 
199 
200 /* A "multistring" can hold Unicode, UTF8, or MBS versions of
201  * the string.  If you set and read the same version, no translation
202  * is done.  If you set and read different versions, the library
203  * will attempt to transparently convert.
204  */
205 struct archive_mstring {
206 	struct archive_string aes_mbs;
207 	struct archive_string aes_utf8;
208 	struct archive_wstring aes_wcs;
209 	struct archive_string aes_mbs_in_locale;
210 	/* Bitmap of which of the above are valid.  Because we're lazy
211 	 * about malloc-ing and reusing the underlying storage, we
212 	 * can't rely on NULL pointers to indicate whether a string
213 	 * has been set. */
214 	int aes_set;
215 #define	AES_SET_MBS 1
216 #define	AES_SET_UTF8 2
217 #define	AES_SET_WCS 4
218 };
219 
220 void	archive_mstring_clean(struct archive_mstring *);
221 void	archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src);
222 int archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **);
223 int archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **);
224 int archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **);
225 int	archive_mstring_get_mbs_l(struct archive_mstring *, const char **,
226 	    size_t *, struct archive_string_conv *);
227 int	archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs);
228 int	archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs,
229 	    size_t);
230 int	archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8);
231 int	archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs);
232 int	archive_mstring_copy_wcs_len(struct archive_mstring *,
233 	    const wchar_t *wcs, size_t);
234 int	archive_mstring_copy_mbs_len_l(struct archive_mstring *,
235 	    const char *mbs, size_t, struct archive_string_conv *);
236 int     archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8);
237 
238 
239 #endif
240