1 /* 2 Unix SMB/CIFS implementation. 3 charset defines 4 Copyright (C) Andrew Tridgell 2001 5 Copyright (C) Jelmer Vernooij 2002 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 /* This is a public header file that is installed as part of Samba. 22 * If you remove any functions or change their signature, update 23 * the so version number. */ 24 25 #ifndef __CHARSET_H__ 26 #define __CHARSET_H__ 27 28 #include <talloc.h> 29 30 /* this defines the charset types used in samba */ 31 typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; 32 33 #define NUM_CHARSETS 7 34 35 /* 36 * SMB UCS2 (16-bit unicode) internal type. 37 * smb_ucs2_t is *always* in little endian format. 38 */ 39 40 typedef uint16_t smb_ucs2_t; 41 42 #ifdef WORDS_BIGENDIAN 43 #define UCS2_SHIFT 8 44 #else 45 #define UCS2_SHIFT 0 46 #endif 47 48 /* turn a 7 bit character into a ucs2 character */ 49 #define UCS2_CHAR(c) ((c) << UCS2_SHIFT) 50 51 /* 52 * for each charset we have a function that pulls from that charset to 53 * a ucs2 buffer, and a function that pushes to a ucs2 buffer 54 * */ 55 56 struct charset_functions { 57 const char *name; 58 size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft, 59 char **outbuf, size_t *outbytesleft); 60 size_t (*push)(void *, const char **inbuf, size_t *inbytesleft, 61 char **outbuf, size_t *outbytesleft); 62 bool samba_internal_charset; 63 }; 64 65 /* this type is used for manipulating unicode codepoints */ 66 typedef uint32_t codepoint_t; 67 68 #define INVALID_CODEPOINT ((codepoint_t)-1) 69 70 /* generic iconv conversion structure */ 71 typedef struct smb_iconv_s { 72 size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft, 73 char **outbuf, size_t *outbytesleft); 74 size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft, 75 char **outbuf, size_t *outbytesleft); 76 size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft, 77 char **outbuf, size_t *outbytesleft); 78 void *cd_direct, *cd_pull, *cd_push; 79 char *from_name, *to_name; 80 } *smb_iconv_t; 81 82 /* string manipulation flags */ 83 #define STR_TERMINATE 1 84 #define STR_UPPER 2 85 #define STR_ASCII 4 86 #define STR_UNICODE 8 87 #define STR_NOALIGN 16 88 #define STR_NO_RANGE_CHECK 32 89 #define STR_LEN8BIT 64 90 #define STR_TERMINATE_ASCII 128 /* only terminate if ascii */ 91 #define STR_LEN_NOTERM 256 /* the length field is the unterminated length */ 92 93 struct loadparm_context; 94 struct smb_iconv_handle; 95 96 char *strchr_m(const char *s, char c); 97 /** 98 * Calculate the number of units (8 or 16-bit, depending on the 99 * destination charset), that would be needed to convert the input 100 * string which is expected to be in in src_charset encoding to the 101 * destination charset (which should be a unicode charset). 102 */ 103 size_t strlen_m_ext_handle(struct smb_iconv_handle *ic, 104 const char *s, charset_t src_charset, charset_t dst_charset); 105 size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset); 106 size_t strlen_m_ext_term(const char *s, charset_t src_charset, 107 charset_t dst_charset); 108 size_t strlen_m_ext_term_null(const char *s, 109 charset_t src_charset, 110 charset_t dst_charset); 111 size_t strlen_m(const char *s); 112 size_t strlen_m_term(const char *s); 113 size_t strlen_m_term_null(const char *s); 114 char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength); 115 void string_replace_m(char *s, char oldc, char newc); 116 bool strcsequal(const char *s1,const char *s2); 117 bool strequal_m(const char *s1, const char *s2); 118 int strncasecmp_m(const char *s1, const char *s2, size_t n); 119 int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle, 120 const char *s1, const char *s2, size_t n); 121 bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize); 122 int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle, 123 const char *s1, const char *s2); 124 int strcasecmp_m(const char *s1, const char *s2); 125 size_t count_chars_m(const char *s, char c); 126 char *strupper_talloc(TALLOC_CTX *ctx, const char *src); 127 char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src); 128 char *strupper_talloc_n_handle(struct smb_iconv_handle *iconv_handle, 129 TALLOC_CTX *ctx, const char *src, size_t n); 130 char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n); 131 char *strlower_talloc_handle(struct smb_iconv_handle *iconv_handle, 132 TALLOC_CTX *ctx, const char *src); 133 char *strlower_talloc(TALLOC_CTX *ctx, const char *src); 134 bool strhasupper(const char *string); 135 bool strhaslower_handle(struct smb_iconv_handle *ic, 136 const char *string); 137 bool strhaslower(const char *string); 138 bool strhasupper_handle(struct smb_iconv_handle *ic, 139 const char *string); 140 char *strrchr_m(const char *s, char c); 141 char *strchr_m(const char *s, char c); 142 char *strstr_m(const char *src, const char *findstr); 143 144 bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 145 bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size); 146 bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 147 bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 148 bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size); 149 bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 150 ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags); 151 ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags); 152 153 bool convert_string_talloc(TALLOC_CTX *ctx, 154 charset_t from, charset_t to, 155 void const *src, size_t srclen, 156 void *dest, size_t *converted_size); 157 158 bool convert_string(charset_t from, charset_t to, 159 void const *src, size_t srclen, 160 void *dest, size_t destlen, 161 size_t *converted_size); 162 bool convert_string_error(charset_t from, charset_t to, 163 void const *src, size_t srclen, 164 void *dest, size_t destlen, 165 size_t *converted_size); 166 167 struct smb_iconv_handle *get_iconv_handle(void); 168 struct smb_iconv_handle *get_iconv_testing_handle(TALLOC_CTX *mem_ctx, 169 const char *dos_charset, 170 const char *unix_charset, 171 bool use_builtin_handlers); 172 struct smb_iconv_handle *reinit_iconv_handle(TALLOC_CTX *mem_ctx, 173 const char *dos_charset, 174 const char *unix_charset); 175 void free_iconv_handle(void); 176 177 smb_iconv_t get_conv_handle(struct smb_iconv_handle *ic, 178 charset_t from, charset_t to); 179 const char *charset_name(struct smb_iconv_handle *ic, charset_t ch); 180 181 codepoint_t next_codepoint_ext(const char *str, size_t len, 182 charset_t src_charset, size_t *size); 183 codepoint_t next_codepoint(const char *str, size_t *size); 184 ssize_t push_codepoint(char *str, codepoint_t c); 185 186 /* codepoints */ 187 codepoint_t next_codepoint_handle_ext(struct smb_iconv_handle *ic, 188 const char *str, size_t len, 189 charset_t src_charset, 190 size_t *size); 191 codepoint_t next_codepoint_handle(struct smb_iconv_handle *ic, 192 const char *str, size_t *size); 193 ssize_t push_codepoint_handle(struct smb_iconv_handle *ic, 194 char *str, codepoint_t c); 195 196 codepoint_t toupper_m(codepoint_t val); 197 codepoint_t tolower_m(codepoint_t val); 198 bool islower_m(codepoint_t val); 199 bool isupper_m(codepoint_t val); 200 int codepoint_cmpi(codepoint_t c1, codepoint_t c2); 201 202 /* Iconv convenience functions */ 203 struct smb_iconv_handle *smb_iconv_handle_reinit(TALLOC_CTX *mem_ctx, 204 const char *dos_charset, 205 const char *unix_charset, 206 bool use_builtin_handlers, 207 struct smb_iconv_handle *old_ic); 208 209 bool convert_string_handle(struct smb_iconv_handle *ic, 210 charset_t from, charset_t to, 211 void const *src, size_t srclen, 212 void *dest, size_t destlen, size_t *converted_size); 213 bool convert_string_error_handle(struct smb_iconv_handle *ic, 214 charset_t from, charset_t to, 215 void const *src, size_t srclen, 216 void *dest, size_t destlen, 217 size_t *converted_size); 218 219 bool convert_string_talloc_handle(TALLOC_CTX *ctx, 220 struct smb_iconv_handle *ic, 221 charset_t from, charset_t to, 222 void const *src, size_t srclen, 223 void *dest, size_t *converted_size); 224 /* iconv */ 225 smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode); 226 int smb_iconv_close(smb_iconv_t cd); 227 size_t smb_iconv(smb_iconv_t cd, 228 const char **inbuf, size_t *inbytesleft, 229 char **outbuf, size_t *outbytesleft); 230 smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 231 const char *fromcode, bool use_builtin_handlers); 232 233 void smb_init_locale(void); 234 235 /* The following definitions come from util_unistr_w.c */ 236 237 size_t strlen_w(const smb_ucs2_t *src); 238 size_t strnlen_w(const smb_ucs2_t *src, size_t max); 239 smb_ucs2_t *strchr_w(const smb_ucs2_t *s, smb_ucs2_t c); 240 smb_ucs2_t *strchr_wa(const smb_ucs2_t *s, char c); 241 smb_ucs2_t *strrchr_w(const smb_ucs2_t *s, smb_ucs2_t c); 242 smb_ucs2_t *strnrchr_w(const smb_ucs2_t *s, smb_ucs2_t c, unsigned int n); 243 smb_ucs2_t *strstr_w(const smb_ucs2_t *s, const smb_ucs2_t *ins); 244 bool strlower_w(smb_ucs2_t *s); 245 bool strupper_w(smb_ucs2_t *s); 246 int strcasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b); 247 int strncasecmp_w(const smb_ucs2_t *a, const smb_ucs2_t *b, size_t len); 248 int strcmp_wa(const smb_ucs2_t *a, const char *b); 249 smb_ucs2_t toupper_w(smb_ucs2_t v); 250 251 #endif /* __CHARSET_H__ */ 252