1 /***************************************************************************
2  *   copyright           : (C) 2002 by Hendrik Sattler                     *
3  *   mail                : post@hendrik-sattler.de                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  ***************************************************************************/
11 
12 #ifndef CHARSETS_H
13 #define CHARSETS_H
14 
15 /** You MUST call this function before all other charset functions.
16  *  @param charset if NULL, locale specific charset will be returned
17  *                 on get_system_charset().
18  *  @param announce if non-zero, prints a informational message to stderr
19  */
20 void charset_init (char* charset, int announce);
21 
22 /** returns the system character set
23  */
24 char* get_system_charset (); /* considers setting from charset_init() */
25 char* get_system_charset_raw(); /* only what the system tells */
26 
27 enum repmode {
28   /* replace all unknown character with a \XXXX replacement
29    * it is used for all output that we might read again
30    */
31   REPMODE_ESCAPE_CHARS = 0,
32   /* replace all unknown character with a '?'
33    * it is used for all output that we may not read again
34    */
35   REPMODE_QUESTIONMARK,
36   /* ignore any failure (prints an error message)
37    */
38   REPMODE_IGNORE
39 };
40 
41 #include "intincl.h"
42 #define ucs4char_t uint32_t
43 #define ucs2char_t uint16_t
44 #define gsmchar_t  uint8_t
45 
46 #include <stdlib.h>
47 
48 /* return memory count of elements */
49 size_t ucs4len (const ucs4char_t* input);
50 size_t ucs2len (const ucs2char_t* input);
51 #define gsmlen(input) str_len(input)
52 
53 /* return the count of characters */
54 size_t gsmwidth (const gsmchar_t* input);
55 
56 /* almost like strndup()/strdup() but return NULL if s == NULL */
57 ucs4char_t* ucs4ndup(const ucs4char_t* s, size_t n);
58 ucs4char_t* ucs4dup(const ucs4char_t* s);
59 ucs2char_t* ucs2ndup(const ucs2char_t* s, size_t n);
60 ucs2char_t* ucs2dup(const ucs2char_t* s);
61 
62 /* just like strncmp()/strcmp() */
63 int ucs4ncmp (const ucs4char_t* s1, const ucs4char_t* s2, size_t n);
64 int ucs4cmp (const ucs4char_t* s1, const ucs4char_t* s2);
65 
66 /* just like strstr() */
67 ucs4char_t* ucs4str (const ucs4char_t* haystack, const ucs4char_t* needle);
68 
69 /* not like strncpy but copies n-1 characters with a terminating 0 */
70 ucs4char_t* ucs4ncpy (ucs4char_t* dest, const ucs4char_t* src, size_t n);
71 
72 /* these two are mainly used by the other function from below */
73 ucs4char_t* convert_to_internal (const char* from_code,
74 				 char* input,
75 				 size_t insize);
76 char* convert_from_internal (const char* to_code,
77 			     ucs4char_t* input,
78 			     enum repmode replacement_mode);
79 
80 /* convert from/to the system's charset */
81 char* convert_to_system (ucs4char_t* input,
82 			 enum repmode replacement_mode);
83 ucs4char_t* convert_from_system (char* input);
84 
85 /* convert from/to 7bit GSM charset */
86 gsmchar_t* convert_to_gsm (ucs4char_t* input);
87 ucs4char_t* convert_from_gsm (gsmchar_t* input);
88 
89 /* convert from/to UCS-2 unicode charset
90  * input must be in host endianess
91  */
92 ucs2char_t* convert_to_ucs2 (ucs4char_t* input);
93 ucs4char_t* convert_from_ucs2 (ucs2char_t* input);
94 /* fromhe and tohe define function to convert from and to host endianess */
95 char* convert_to_ucs2_hexstring (ucs4char_t* input,
96 				 uint16_t (*fromhe)(uint16_t i));
97 ucs4char_t* convert_from_ucs2_hexstring (char* input,
98 					 uint16_t (*tohe)(uint16_t i));
99 
100 /* Count how many GSM-encoded UCS-4 characters fit into
101  * a max of count septets.
102  * A count of zero counts the septets needed by input.
103  */
104 unsigned int gsm_count (ucs4char_t* input, unsigned int count);
105 
106 #endif
107