1caf54c4fSMartin Matuska /*- 26c95142eSMartin Matuska * Copyright (c) 2003-2010 Tim Kientzle 3caf54c4fSMartin Matuska * All rights reserved. 4caf54c4fSMartin Matuska * 5caf54c4fSMartin Matuska * Redistribution and use in source and binary forms, with or without 6caf54c4fSMartin Matuska * modification, are permitted provided that the following conditions 7caf54c4fSMartin Matuska * are met: 8caf54c4fSMartin Matuska * 1. Redistributions of source code must retain the above copyright 9caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer. 10caf54c4fSMartin Matuska * 2. Redistributions in binary form must reproduce the above copyright 11caf54c4fSMartin Matuska * notice, this list of conditions and the following disclaimer in the 12caf54c4fSMartin Matuska * documentation and/or other materials provided with the distribution. 13caf54c4fSMartin Matuska * 14caf54c4fSMartin Matuska * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15caf54c4fSMartin Matuska * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16caf54c4fSMartin Matuska * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17caf54c4fSMartin Matuska * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18caf54c4fSMartin Matuska * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19caf54c4fSMartin Matuska * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20caf54c4fSMartin Matuska * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21caf54c4fSMartin Matuska * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22caf54c4fSMartin Matuska * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23caf54c4fSMartin Matuska * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24caf54c4fSMartin Matuska */ 25caf54c4fSMartin Matuska 26f9762417SMartin Matuska #ifndef ARCHIVE_STRING_H_INCLUDED 27f9762417SMartin Matuska #define ARCHIVE_STRING_H_INCLUDED 28f9762417SMartin Matuska 29caf54c4fSMartin Matuska #ifndef __LIBARCHIVE_BUILD 306c95142eSMartin Matuska #ifndef __LIBARCHIVE_TEST 31caf54c4fSMartin Matuska #error This header is only to be used internally to libarchive. 32caf54c4fSMartin Matuska #endif 336c95142eSMartin Matuska #endif 34caf54c4fSMartin Matuska 35caf54c4fSMartin Matuska #include <stdarg.h> 36caf54c4fSMartin Matuska #ifdef HAVE_STDLIB_H 37caf54c4fSMartin Matuska #include <stdlib.h> /* required for wchar_t on some systems */ 38caf54c4fSMartin Matuska #endif 39caf54c4fSMartin Matuska #ifdef HAVE_STRING_H 40caf54c4fSMartin Matuska #include <string.h> 41caf54c4fSMartin Matuska #endif 42caf54c4fSMartin Matuska #ifdef HAVE_WCHAR_H 43caf54c4fSMartin Matuska #include <wchar.h> 44caf54c4fSMartin Matuska #endif 45caf54c4fSMartin Matuska 46caf54c4fSMartin Matuska #include "archive.h" 47caf54c4fSMartin Matuska 48caf54c4fSMartin Matuska /* 496c95142eSMartin Matuska * Basic resizable/reusable string support similar to Java's "StringBuffer." 50caf54c4fSMartin Matuska * 51caf54c4fSMartin Matuska * Unlike sbuf(9), the buffers here are fully reusable and track the 52caf54c4fSMartin Matuska * length throughout. 53caf54c4fSMartin Matuska */ 54caf54c4fSMartin Matuska 55caf54c4fSMartin Matuska struct archive_string { 56caf54c4fSMartin Matuska char *s; /* Pointer to the storage */ 576c95142eSMartin Matuska size_t length; /* Length of 's' in characters */ 586c95142eSMartin Matuska size_t buffer_length; /* Length of malloc-ed storage in bytes. */ 59caf54c4fSMartin Matuska }; 60caf54c4fSMartin Matuska 616c95142eSMartin Matuska struct archive_wstring { 626c95142eSMartin Matuska wchar_t *s; /* Pointer to the storage */ 636c95142eSMartin Matuska size_t length; /* Length of 's' in characters */ 646c95142eSMartin Matuska size_t buffer_length; /* Length of malloc-ed storage in bytes. */ 656c95142eSMartin Matuska }; 666c95142eSMartin Matuska 676c95142eSMartin Matuska struct archive_string_conv; 686c95142eSMartin Matuska 69caf54c4fSMartin Matuska /* Initialize an archive_string object on the stack or elsewhere. */ 70caf54c4fSMartin Matuska #define archive_string_init(a) \ 71caf54c4fSMartin Matuska do { (a)->s = NULL; (a)->length = 0; (a)->buffer_length = 0; } while(0) 72caf54c4fSMartin Matuska 73caf54c4fSMartin Matuska /* Append a C char to an archive_string, resizing as necessary. */ 74caf54c4fSMartin Matuska struct archive_string * 756c95142eSMartin Matuska archive_strappend_char(struct archive_string *, char); 76caf54c4fSMartin Matuska 776c95142eSMartin Matuska /* Ditto for a wchar_t and an archive_wstring. */ 786c95142eSMartin Matuska struct archive_wstring * 796c95142eSMartin Matuska archive_wstrappend_wchar(struct archive_wstring *, wchar_t); 80caf54c4fSMartin Matuska 812dbf8c4aSMartin Matuska /* Append a raw array to an archive_string, resizing as necessary */ 822dbf8c4aSMartin Matuska struct archive_string * 832dbf8c4aSMartin Matuska archive_array_append(struct archive_string *, const char *, size_t); 842dbf8c4aSMartin Matuska 856c95142eSMartin Matuska /* Convert a Unicode string to current locale and append the result. */ 866c95142eSMartin Matuska /* Returns -1 if conversion fails. */ 876c95142eSMartin Matuska int 886c95142eSMartin Matuska archive_string_append_from_wcs(struct archive_string *, const wchar_t *, size_t); 89caf54c4fSMartin Matuska 906c95142eSMartin Matuska 916c95142eSMartin Matuska /* Create a string conversion object. 926c95142eSMartin Matuska * Return NULL and set a error message if the conversion is not supported 936c95142eSMartin Matuska * on the platform. */ 946c95142eSMartin Matuska struct archive_string_conv * 956c95142eSMartin Matuska archive_string_conversion_to_charset(struct archive *, const char *, int); 966c95142eSMartin Matuska struct archive_string_conv * 976c95142eSMartin Matuska archive_string_conversion_from_charset(struct archive *, const char *, int); 986c95142eSMartin Matuska /* Create the default string conversion object for reading/writing an archive. 996c95142eSMartin Matuska * Return NULL if the conversion is unneeded. 1006c95142eSMartin Matuska * Note: On non Windows platform this always returns NULL. 1016c95142eSMartin Matuska */ 1026c95142eSMartin Matuska struct archive_string_conv * 1036c95142eSMartin Matuska archive_string_default_conversion_for_read(struct archive *); 1046c95142eSMartin Matuska struct archive_string_conv * 1056c95142eSMartin Matuska archive_string_default_conversion_for_write(struct archive *); 1066c95142eSMartin Matuska /* Dispose of a string conversion object. */ 1076c95142eSMartin Matuska void 1086c95142eSMartin Matuska archive_string_conversion_free(struct archive *); 1096c95142eSMartin Matuska const char * 1106c95142eSMartin Matuska archive_string_conversion_charset_name(struct archive_string_conv *); 1116c95142eSMartin Matuska void 1126c95142eSMartin Matuska archive_string_conversion_set_opt(struct archive_string_conv *, int); 1136c95142eSMartin Matuska #define SCONV_SET_OPT_UTF8_LIBARCHIVE2X 1 114fd082e96SMartin Matuska #define SCONV_SET_OPT_NORMALIZATION_C 2 115fd082e96SMartin Matuska #define SCONV_SET_OPT_NORMALIZATION_D 4 1166c95142eSMartin Matuska 1176c95142eSMartin Matuska 1186c95142eSMartin Matuska /* Copy one archive_string to another in locale conversion. 119a2e802b7SMartin Matuska * Return -1 if conversion fails. */ 1206c95142eSMartin Matuska int 121fd082e96SMartin Matuska archive_strncpy_l(struct archive_string *, const void *, size_t, 1226c95142eSMartin Matuska struct archive_string_conv *); 1236c95142eSMartin Matuska 1246c95142eSMartin Matuska /* Copy one archive_string to another in locale conversion. 125a2e802b7SMartin Matuska * Return -1 if conversion fails. */ 1266c95142eSMartin Matuska int 127fd082e96SMartin Matuska archive_strncat_l(struct archive_string *, const void *, size_t, 1286c95142eSMartin Matuska struct archive_string_conv *); 1296c95142eSMartin Matuska 130caf54c4fSMartin Matuska 131caf54c4fSMartin Matuska /* Copy one archive_string to another */ 132caf54c4fSMartin Matuska #define archive_string_copy(dest, src) \ 1336c95142eSMartin Matuska ((dest)->length = 0, archive_string_concat((dest), (src))) 1346c95142eSMartin Matuska #define archive_wstring_copy(dest, src) \ 1356c95142eSMartin Matuska ((dest)->length = 0, archive_wstring_concat((dest), (src))) 136caf54c4fSMartin Matuska 137caf54c4fSMartin Matuska /* Concatenate one archive_string to another */ 1386c95142eSMartin Matuska void archive_string_concat(struct archive_string *dest, struct archive_string *src); 1396c95142eSMartin Matuska void archive_wstring_concat(struct archive_wstring *dest, struct archive_wstring *src); 140caf54c4fSMartin Matuska 141caf54c4fSMartin Matuska /* Ensure that the underlying buffer is at least as large as the request. */ 142caf54c4fSMartin Matuska struct archive_string * 1436c95142eSMartin Matuska archive_string_ensure(struct archive_string *, size_t); 1446c95142eSMartin Matuska struct archive_wstring * 1456c95142eSMartin Matuska archive_wstring_ensure(struct archive_wstring *, size_t); 146caf54c4fSMartin Matuska 147caf54c4fSMartin Matuska /* Append C string, which may lack trailing \0. */ 148caf54c4fSMartin Matuska /* The source is declared void * here because this gets used with 149caf54c4fSMartin Matuska * "signed char *", "unsigned char *" and "char *" arguments. 150caf54c4fSMartin Matuska * Declaring it "char *" as with some of the other functions just 151caf54c4fSMartin Matuska * leads to a lot of extra casts. */ 152caf54c4fSMartin Matuska struct archive_string * 1536c95142eSMartin Matuska archive_strncat(struct archive_string *, const void *, size_t); 1546c95142eSMartin Matuska struct archive_wstring * 1556c95142eSMartin Matuska archive_wstrncat(struct archive_wstring *, const wchar_t *, size_t); 156caf54c4fSMartin Matuska 157caf54c4fSMartin Matuska /* Append a C string to an archive_string, resizing as necessary. */ 1586c95142eSMartin Matuska struct archive_string * 1596c95142eSMartin Matuska archive_strcat(struct archive_string *, const void *); 1606c95142eSMartin Matuska struct archive_wstring * 1616c95142eSMartin Matuska archive_wstrcat(struct archive_wstring *, const wchar_t *); 162caf54c4fSMartin Matuska 163caf54c4fSMartin Matuska /* Copy a C string to an archive_string, resizing as necessary. */ 164caf54c4fSMartin Matuska #define archive_strcpy(as,p) \ 1656c95142eSMartin Matuska archive_strncpy((as), (p), ((p) == NULL ? 0 : strlen(p))) 1666c95142eSMartin Matuska #define archive_wstrcpy(as,p) \ 1676c95142eSMartin Matuska archive_wstrncpy((as), (p), ((p) == NULL ? 0 : wcslen(p))) 168fd082e96SMartin Matuska #define archive_strcpy_l(as,p,lo) \ 169fd082e96SMartin Matuska archive_strncpy_l((as), (p), ((p) == NULL ? 0 : strlen(p)), (lo)) 170caf54c4fSMartin Matuska 171caf54c4fSMartin Matuska /* Copy a C string to an archive_string with limit, resizing as necessary. */ 172caf54c4fSMartin Matuska #define archive_strncpy(as,p,l) \ 173caf54c4fSMartin Matuska ((as)->length=0, archive_strncat((as), (p), (l))) 1746c95142eSMartin Matuska #define archive_wstrncpy(as,p,l) \ 1756c95142eSMartin Matuska ((as)->length = 0, archive_wstrncat((as), (p), (l))) 176caf54c4fSMartin Matuska 177caf54c4fSMartin Matuska /* Return length of string. */ 178caf54c4fSMartin Matuska #define archive_strlen(a) ((a)->length) 179caf54c4fSMartin Matuska 180caf54c4fSMartin Matuska /* Set string length to zero. */ 181caf54c4fSMartin Matuska #define archive_string_empty(a) ((a)->length = 0) 1826c95142eSMartin Matuska #define archive_wstring_empty(a) ((a)->length = 0) 183caf54c4fSMartin Matuska 184caf54c4fSMartin Matuska /* Release any allocated storage resources. */ 1856c95142eSMartin Matuska void archive_string_free(struct archive_string *); 1866c95142eSMartin Matuska void archive_wstring_free(struct archive_wstring *); 187caf54c4fSMartin Matuska 188caf54c4fSMartin Matuska /* Like 'vsprintf', but resizes the underlying string as necessary. */ 1896c95142eSMartin Matuska /* Note: This only implements a small subset of standard printf functionality. */ 1906c95142eSMartin Matuska void archive_string_vsprintf(struct archive_string *, const char *, 191caf54c4fSMartin Matuska va_list) __LA_PRINTF(2, 0); 1926c95142eSMartin Matuska void archive_string_sprintf(struct archive_string *, const char *, ...) 193caf54c4fSMartin Matuska __LA_PRINTF(2, 3); 194caf54c4fSMartin Matuska 1956c95142eSMartin Matuska /* Translates from MBS to Unicode. */ 1966c95142eSMartin Matuska /* Returns non-zero if conversion failed in any way. */ 1976c95142eSMartin Matuska int archive_wstring_append_from_mbs(struct archive_wstring *dest, 1986c95142eSMartin Matuska const char *, size_t); 1996c95142eSMartin Matuska 2006c95142eSMartin Matuska 2016c95142eSMartin Matuska /* A "multistring" can hold Unicode, UTF8, or MBS versions of 2026c95142eSMartin Matuska * the string. If you set and read the same version, no translation 2036c95142eSMartin Matuska * is done. If you set and read different versions, the library 2046c95142eSMartin Matuska * will attempt to transparently convert. 2056c95142eSMartin Matuska */ 2066c95142eSMartin Matuska struct archive_mstring { 2076c95142eSMartin Matuska struct archive_string aes_mbs; 2086c95142eSMartin Matuska struct archive_string aes_utf8; 2096c95142eSMartin Matuska struct archive_wstring aes_wcs; 2106c95142eSMartin Matuska struct archive_string aes_mbs_in_locale; 2116c95142eSMartin Matuska /* Bitmap of which of the above are valid. Because we're lazy 2126c95142eSMartin Matuska * about malloc-ing and reusing the underlying storage, we 2136c95142eSMartin Matuska * can't rely on NULL pointers to indicate whether a string 2146c95142eSMartin Matuska * has been set. */ 2156c95142eSMartin Matuska int aes_set; 2166c95142eSMartin Matuska #define AES_SET_MBS 1 2176c95142eSMartin Matuska #define AES_SET_UTF8 2 2186c95142eSMartin Matuska #define AES_SET_WCS 4 2196c95142eSMartin Matuska }; 2206c95142eSMartin Matuska 2216c95142eSMartin Matuska void archive_mstring_clean(struct archive_mstring *); 2226c95142eSMartin Matuska void archive_mstring_copy(struct archive_mstring *dest, struct archive_mstring *src); 2236c95142eSMartin Matuska int archive_mstring_get_mbs(struct archive *, struct archive_mstring *, const char **); 2246c95142eSMartin Matuska int archive_mstring_get_utf8(struct archive *, struct archive_mstring *, const char **); 2256c95142eSMartin Matuska int archive_mstring_get_wcs(struct archive *, struct archive_mstring *, const wchar_t **); 226c3afd20fSMartin Matuska int archive_mstring_get_mbs_l(struct archive *, struct archive_mstring *, const char **, 2276c95142eSMartin Matuska size_t *, struct archive_string_conv *); 2286c95142eSMartin Matuska int archive_mstring_copy_mbs(struct archive_mstring *, const char *mbs); 2296c95142eSMartin Matuska int archive_mstring_copy_mbs_len(struct archive_mstring *, const char *mbs, 2306c95142eSMartin Matuska size_t); 2316c95142eSMartin Matuska int archive_mstring_copy_utf8(struct archive_mstring *, const char *utf8); 2326c95142eSMartin Matuska int archive_mstring_copy_wcs(struct archive_mstring *, const wchar_t *wcs); 2336c95142eSMartin Matuska int archive_mstring_copy_wcs_len(struct archive_mstring *, 2346c95142eSMartin Matuska const wchar_t *wcs, size_t); 2356c95142eSMartin Matuska int archive_mstring_copy_mbs_len_l(struct archive_mstring *, 2366c95142eSMartin Matuska const char *mbs, size_t, struct archive_string_conv *); 2376c95142eSMartin Matuska int archive_mstring_update_utf8(struct archive *, struct archive_mstring *aes, const char *utf8); 238caf54c4fSMartin Matuska 239caf54c4fSMartin Matuska 240caf54c4fSMartin Matuska #endif 241