1caf54c4fSMartin Matuska /*-
26c95142eSMartin Matuska  * Copyright (c) 2003-2010 Tim Kientzle
3caf54c4fSMartin Matuska  * All rights reserved.
4caf54c4fSMartin Matuska  *
5caf54c4fSMartin Matuska  * Redistribution and use in source and binary forms, with or without
6caf54c4fSMartin Matuska  * modification, are permitted provided that the following conditions
7caf54c4fSMartin Matuska  * are met:
8caf54c4fSMartin Matuska  * 1. Redistributions of source code must retain the above copyright
9caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer.
10caf54c4fSMartin Matuska  * 2. Redistributions in binary form must reproduce the above copyright
11caf54c4fSMartin Matuska  *    notice, this list of conditions and the following disclaimer in the
12caf54c4fSMartin Matuska  *    documentation and/or other materials provided with the distribution.
13caf54c4fSMartin Matuska  *
14caf54c4fSMartin Matuska  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15caf54c4fSMartin Matuska  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16caf54c4fSMartin Matuska  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17caf54c4fSMartin Matuska  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18caf54c4fSMartin Matuska  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19caf54c4fSMartin Matuska  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20caf54c4fSMartin Matuska  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21caf54c4fSMartin Matuska  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22caf54c4fSMartin Matuska  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23caf54c4fSMartin Matuska  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24caf54c4fSMartin Matuska  */
25caf54c4fSMartin Matuska 
26f9762417SMartin Matuska #ifndef ARCHIVE_STRING_H_INCLUDED
27f9762417SMartin Matuska #define ARCHIVE_STRING_H_INCLUDED
28f9762417SMartin Matuska 
29caf54c4fSMartin Matuska #ifndef __LIBARCHIVE_BUILD
306c95142eSMartin Matuska #ifndef __LIBARCHIVE_TEST
31caf54c4fSMartin Matuska #error This header is only to be used internally to libarchive.
32caf54c4fSMartin Matuska #endif
336c95142eSMartin Matuska #endif
34caf54c4fSMartin Matuska 
35caf54c4fSMartin Matuska #include <stdarg.h>
36caf54c4fSMartin Matuska #ifdef HAVE_STDLIB_H
37caf54c4fSMartin Matuska #include <stdlib.h>  /* required for wchar_t on some systems */
38caf54c4fSMartin Matuska #endif
39caf54c4fSMartin Matuska #ifdef HAVE_STRING_H
40caf54c4fSMartin Matuska #include <string.h>
41caf54c4fSMartin Matuska #endif
42caf54c4fSMartin Matuska #ifdef HAVE_WCHAR_H
43caf54c4fSMartin Matuska #include <wchar.h>
44caf54c4fSMartin Matuska #endif
45caf54c4fSMartin Matuska 
46caf54c4fSMartin Matuska #include "archive.h"
47caf54c4fSMartin Matuska 
48caf54c4fSMartin Matuska /*
496c95142eSMartin Matuska  * Basic resizable/reusable string support similar to Java's "StringBuffer."
50caf54c4fSMartin Matuska  *
51caf54c4fSMartin Matuska  * Unlike sbuf(9), the buffers here are fully reusable and track the
52caf54c4fSMartin Matuska  * length throughout.
53caf54c4fSMartin Matuska  */
54caf54c4fSMartin Matuska 
55caf54c4fSMartin Matuska struct archive_string {
56caf54c4fSMartin Matuska 	char	*s;  /* Pointer to the storage */
576c95142eSMartin Matuska 	size_t	 length; /* Length of 's' in characters */
586c95142eSMartin Matuska 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
59caf54c4fSMartin Matuska };
60caf54c4fSMartin Matuska 
616c95142eSMartin Matuska struct archive_wstring {
626c95142eSMartin Matuska 	wchar_t	*s;  /* Pointer to the storage */
636c95142eSMartin Matuska 	size_t	 length; /* Length of 's' in characters */
646c95142eSMartin Matuska 	size_t	 buffer_length; /* Length of malloc-ed storage in bytes. */
656c95142eSMartin Matuska };
666c95142eSMartin Matuska 
676c95142eSMartin Matuska struct archive_string_conv;
686c95142eSMartin Matuska 
69caf54c4fSMartin Matuska /* Initialize an archive_string object on the stack or elsewhere. */
70caf54c4fSMartin Matuska #define	archive_string_init(a)	\
71caf54c4fSMartin Matuska 	do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0)
72caf54c4fSMartin Matuska 
73caf54c4fSMartin Matuska /* Append a C char to an archive_string, resizing as necessary. */
74caf54c4fSMartin Matuska struct archive_string *
756c95142eSMartin Matuska archive_strappend_char(struct archive_string *, char);
76caf54c4fSMartin Matuska 
776c95142eSMartin Matuska /* Ditto for a wchar_t and an archive_wstring. */
786c95142eSMartin Matuska struct archive_wstring *
796c95142eSMartin Matuska archive_wstrappend_wchar(struct archive_wstring *, wchar_t);
80caf54c4fSMartin Matuska 
812dbf8c4aSMartin Matuska /* Append a raw array to an archive_string, resizing as necessary */
822dbf8c4aSMartin Matuska struct archive_string *
832dbf8c4aSMartin Matuska archive_array_append(struct archive_string *, const char *, size_t);
842dbf8c4aSMartin Matuska 
856c95142eSMartin Matuska /* Convert a Unicode string to current locale and append the result. */
866c95142eSMartin Matuska /* Returns -1 if conversion fails. */
876c95142eSMartin Matuska int
886c95142eSMartin Matuska archive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t);
89caf54c4fSMartin Matuska 
906c95142eSMartin Matuska 
916c95142eSMartin Matuska /* Create a string conversion object.
926c95142eSMartin Matuska  * Return NULL and set a error message if the conversion is not supported
936c95142eSMartin Matuska  * on the platform. */
946c95142eSMartin Matuska struct archive_string_conv *
956c95142eSMartin Matuska archive_string_conversion_to_charset(struct archive *, const char *, int);
966c95142eSMartin Matuska struct archive_string_conv *
976c95142eSMartin Matuska archive_string_conversion_from_charset(struct archive *, const char *, int);
986c95142eSMartin Matuska /* Create the default string conversion object for reading/writing an archive.
996c95142eSMartin Matuska  * Return NULL if the conversion is unneeded.
1006c95142eSMartin Matuska  * Note: On non Windows platform this always returns NULL.
1016c95142eSMartin Matuska  */
1026c95142eSMartin Matuska struct archive_string_conv *
1036c95142eSMartin Matuska archive_string_default_conversion_for_read(struct archive *);
1046c95142eSMartin Matuska struct archive_string_conv *
1056c95142eSMartin Matuska archive_string_default_conversion_for_write(struct archive *);
1066c95142eSMartin Matuska /* Dispose of a string conversion object. */
1076c95142eSMartin Matuska void
1086c95142eSMartin Matuska archive_string_conversion_free(struct archive *);
1096c95142eSMartin Matuska const char *
1106c95142eSMartin Matuska archive_string_conversion_charset_name(struct archive_string_conv *);
1116c95142eSMartin Matuska void
1126c95142eSMartin Matuska archive_string_conversion_set_opt(struct archive_string_conv *, int);
1136c95142eSMartin Matuska #define SCONV_SET_OPT_UTF8_LIBARCHIVE2X	1
114fd082e96SMartin Matuska #define SCONV_SET_OPT_NORMALIZATION_C	2
115fd082e96SMartin Matuska #define SCONV_SET_OPT_NORMALIZATION_D	4
1166c95142eSMartin Matuska 
1176c95142eSMartin Matuska 
1186c95142eSMartin Matuska /* Copy one archive_string to another in locale conversion.
119a2e802b7SMartin Matuska  * Return -1 if conversion fails. */
1206c95142eSMartin Matuska int
121fd082e96SMartin Matuska archive_strncpy_l(struct archive_string *, const void *, size_t,
1226c95142eSMartin Matuska     struct archive_string_conv *);
1236c95142eSMartin Matuska 
1246c95142eSMartin Matuska /* Copy one archive_string to another in locale conversion.
125a2e802b7SMartin Matuska  * Return -1 if conversion fails. */
1266c95142eSMartin Matuska int
127fd082e96SMartin Matuska archive_strncat_l(struct archive_string *, const void *, size_t,
1286c95142eSMartin Matuska     struct archive_string_conv *);
1296c95142eSMartin Matuska 
130caf54c4fSMartin Matuska 
131caf54c4fSMartin Matuska /* Copy one archive_string to another */
132caf54c4fSMartin Matuska #define	archive_string_copy(dest, src) \
1336c95142eSMartin Matuska 	((dest)->length = 0, archive_string_concat((dest), (src)))
1346c95142eSMartin Matuska #define	archive_wstring_copy(dest, src) \
1356c95142eSMartin Matuska 	((dest)->length = 0, archive_wstring_concat((dest), (src)))
136caf54c4fSMartin Matuska 
137caf54c4fSMartin Matuska /* Concatenate one archive_string to another */
1386c95142eSMartin Matuska void archive_string_concat(struct archive_string *dest, struct archive_string *src);
1396c95142eSMartin Matuska void archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src);
140caf54c4fSMartin Matuska 
141caf54c4fSMartin Matuska /* Ensure that the underlying buffer is at least as large as the request. */
142caf54c4fSMartin Matuska struct archive_string *
1436c95142eSMartin Matuska archive_string_ensure(struct archive_string *, size_t);
1446c95142eSMartin Matuska struct archive_wstring *
1456c95142eSMartin Matuska archive_wstring_ensure(struct archive_wstring *, size_t);
146caf54c4fSMartin Matuska 
147caf54c4fSMartin Matuska /* Append C string, which may lack trailing \0. */
148caf54c4fSMartin Matuska /* The source is declared void * here because this gets used with
149caf54c4fSMartin Matuska  * "signed char *", "unsigned char *" and "char *" arguments.
150caf54c4fSMartin Matuska  * Declaring it "char *" as with some of the other functions just
151caf54c4fSMartin Matuska  * leads to a lot of extra casts. */
152caf54c4fSMartin Matuska struct archive_string *
1536c95142eSMartin Matuska archive_strncat(struct archive_string *, const void *, size_t);
1546c95142eSMartin Matuska struct archive_wstring *
1556c95142eSMartin Matuska archive_wstrncat(struct archive_wstring *, const wchar_t *, size_t);
156caf54c4fSMartin Matuska 
157caf54c4fSMartin Matuska /* Append a C string to an archive_string, resizing as necessary. */
1586c95142eSMartin Matuska struct archive_string *
1596c95142eSMartin Matuska archive_strcat(struct archive_string *, const void *);
1606c95142eSMartin Matuska struct archive_wstring *
1616c95142eSMartin Matuska archive_wstrcat(struct archive_wstring *, const wchar_t *);
162caf54c4fSMartin Matuska 
163caf54c4fSMartin Matuska /* Copy a C string to an archive_string, resizing as necessary. */
164caf54c4fSMartin Matuska #define	archive_strcpy(as,p) \
1656c95142eSMartin Matuska 	archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p)))
1666c95142eSMartin Matuska #define	archive_wstrcpy(as,p) \
1676c95142eSMartin Matuska 	archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p)))
168fd082e96SMartin Matuska #define	archive_strcpy_l(as,p,lo) \
169fd082e96SMartin Matuska 	archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo))
170caf54c4fSMartin Matuska 
171caf54c4fSMartin Matuska /* Copy a C string to an archive_string with limit, resizing as necessary. */
172caf54c4fSMartin Matuska #define	archive_strncpy(as,p,l) \
173caf54c4fSMartin Matuska 	((as)->length=0, archive_strncat((as), (p), (l)))
1746c95142eSMartin Matuska #define	archive_wstrncpy(as,p,l) \
1756c95142eSMartin Matuska 	((as)->length = 0, archive_wstrncat((as), (p), (l)))
176caf54c4fSMartin Matuska 
177caf54c4fSMartin Matuska /* Return length of string. */
178caf54c4fSMartin Matuska #define	archive_strlen(a) ((a)->length)
179caf54c4fSMartin Matuska 
180caf54c4fSMartin Matuska /* Set string length to zero. */
181caf54c4fSMartin Matuska #define	archive_string_empty(a) ((a)->length = 0)
1826c95142eSMartin Matuska #define	archive_wstring_empty(a) ((a)->length = 0)
183caf54c4fSMartin Matuska 
184caf54c4fSMartin Matuska /* Release any allocated storage resources. */
1856c95142eSMartin Matuska void	archive_string_free(struct archive_string *);
1866c95142eSMartin Matuska void	archive_wstring_free(struct archive_wstring *);
187caf54c4fSMartin Matuska 
188caf54c4fSMartin Matuska /* Like 'vsprintf', but resizes the underlying string as necessary. */
1896c95142eSMartin Matuska /* Note: This only implements a small subset of standard printf functionality. */
1906c95142eSMartin Matuska void	archive_string_vsprintf(struct archive_string *, const char *,
191caf54c4fSMartin Matuska 	    va_list) __LA_PRINTF(2, 0);
1926c95142eSMartin Matuska void	archive_string_sprintf(struct archive_string *, const char *, ...)
193caf54c4fSMartin Matuska 	    __LA_PRINTF(2, 3);
194caf54c4fSMartin Matuska 
1956c95142eSMartin Matuska /* Translates from MBS to Unicode. */
1966c95142eSMartin Matuska /* Returns non-zero if conversion failed in any way. */
1976c95142eSMartin Matuska int archive_wstring_append_from_mbs(struct archive_wstring *dest,
1986c95142eSMartin Matuska     const char *, size_t);
1996c95142eSMartin Matuska 
2006c95142eSMartin Matuska 
2016c95142eSMartin Matuska /* A "multistring" can hold Unicode, UTF8, or MBS versions of
2026c95142eSMartin Matuska  * the string.  If you set and read the same version, no translation
2036c95142eSMartin Matuska  * is done.  If you set and read different versions, the library
2046c95142eSMartin Matuska  * will attempt to transparently convert.
2056c95142eSMartin Matuska  */
2066c95142eSMartin Matuska struct archive_mstring {
2076c95142eSMartin Matuska 	struct archive_string aes_mbs;
2086c95142eSMartin Matuska 	struct archive_string aes_utf8;
2096c95142eSMartin Matuska 	struct archive_wstring aes_wcs;
2106c95142eSMartin Matuska 	struct archive_string aes_mbs_in_locale;
2116c95142eSMartin Matuska 	/* Bitmap of which of the above are valid.  Because we're lazy
2126c95142eSMartin Matuska 	 * about malloc-ing and reusing the underlying storage, we
2136c95142eSMartin Matuska 	 * can't rely on NULL pointers to indicate whether a string
2146c95142eSMartin Matuska 	 * has been set. */
2156c95142eSMartin Matuska 	int aes_set;
2166c95142eSMartin Matuska #define	AES_SET_MBS 1
2176c95142eSMartin Matuska #define	AES_SET_UTF8 2
2186c95142eSMartin Matuska #define	AES_SET_WCS 4
2196c95142eSMartin Matuska };
2206c95142eSMartin Matuska 
2216c95142eSMartin Matuska void	archive_mstring_clean(struct archive_mstring *);
2226c95142eSMartin Matuska void	archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src);
2236c95142eSMartin Matuska int archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **);
2246c95142eSMartin Matuska int archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **);
2256c95142eSMartin Matuska int archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **);
226c3afd20fSMartin Matuska int	archive_mstring_get_mbs_l(struct archive *, struct archive_mstring *, const char **,
2276c95142eSMartin Matuska 	    size_t *, struct archive_string_conv *);
2286c95142eSMartin Matuska int	archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs);
2296c95142eSMartin Matuska int	archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs,
2306c95142eSMartin Matuska 	    size_t);
2316c95142eSMartin Matuska int	archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8);
2326c95142eSMartin Matuska int	archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs);
2336c95142eSMartin Matuska int	archive_mstring_copy_wcs_len(struct archive_mstring *,
2346c95142eSMartin Matuska 	    const wchar_t *wcs, size_t);
2356c95142eSMartin Matuska int	archive_mstring_copy_mbs_len_l(struct archive_mstring *,
2366c95142eSMartin Matuska 	    const char *mbs, size_t, struct archive_string_conv *);
2376c95142eSMartin Matuska int     archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8);
238caf54c4fSMartin Matuska 
239caf54c4fSMartin Matuska 
240caf54c4fSMartin Matuska #endif
241