1 /*****************************************************************************
2 * strings.h: ETSI EN 300 468 Strings
3 *****************************************************************************
4 * Copyright (C) 2009-2010 VideoLAN
5 *
6 * Authors: Christophe Massiot <massiot@via.ecp.fr>
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining
9 * a copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sublicense, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject
14 * to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be
17 * included in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *****************************************************************************/
27
28 /*
29 * Normative references:
30 * - ETSI EN 300 468 V1.11.1 (2010-04) (SI in DVB systems)
31 */
32
33 #ifndef __BITSTREAM_DVB_STRINGS_H__
34 #define __BITSTREAM_DVB_STRINGS_H__
35
36 #include <stdlib.h> /* malloc, free */
37
38 #include <bitstream/common.h>
39
40 #ifdef __cplusplus
41 extern "C"
42 {
43 #endif
44
45 /*****************************************************************************
46 * DVB string
47 *****************************************************************************/
48 static const char *ppsz_dvb_encodings[] = {
49 /* 0x00 - 0x0f */
50 "", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8",
51 "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13",
52 "ISO-8859-14", "ISO-8859-15", "", "", "", "",
53
54 /* 0x10 - 0x1f */
55 "", "UCS-2BE", "EUC-KR", "GB2312", "UCS-2BE", "UTF-8",
56 "", "", "", "", "", "", "", "", "", "", NULL
57 };
58 static const char *ppsz_dvb_encodings10[] = {
59 "", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4",
60 "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9",
61 "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13", "ISO-8859-14",
62 "ISO-8859-15", NULL
63 };
64
dvb_string_get_encoding(const uint8_t ** pp_string,size_t * pi_length,const char * psz_default_encoding)65 static inline const char *dvb_string_get_encoding(const uint8_t **pp_string,
66 size_t *pi_length, const char *psz_default_encoding)
67 {
68 uint8_t i_first;
69
70 if (!*pi_length)
71 return NULL;
72 i_first = (*pp_string)[0];
73
74 if (!i_first)
75 return NULL;
76 if (i_first >= 0x20)
77 return psz_default_encoding;
78 (*pp_string)++;
79 (*pi_length)--;
80
81 if (i_first == 0x10 && *pi_length >= 2) {
82 uint8_t i_second = (*pp_string)[0];
83 uint8_t i_third = (*pp_string)[1];
84 (*pp_string) += 2;
85 (*pi_length) -= 2;
86
87 if (i_second != 0x0 || i_third == 0 || i_third >= 0x10)
88 return NULL;
89 return ppsz_dvb_encodings10[i_third];
90 }
91
92 if (i_first == 0x1f && *pi_length >= 1) {
93 /* no info on these encodings, skip */
94 (*pp_string)++;
95 (*pi_length)--;
96 return NULL;
97 }
98
99 return ppsz_dvb_encodings[i_first];
100 }
101
dvb_string_set_inner(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length,const char * psz_default_encoding)102 static inline uint8_t *dvb_string_set_inner(const uint8_t *p_string,
103 size_t i_length, const char *psz_encoding, size_t *pi_out_length,
104 const char *psz_default_encoding)
105 {
106 int i;
107
108 if (!strcmp(psz_encoding, psz_default_encoding)) {
109 uint8_t *p_encoded = (uint8_t *)malloc(i_length);
110 *pi_out_length = i_length;
111 memcpy(p_encoded, p_string, i_length);
112 return p_encoded;
113 }
114
115 for (i = 0; ppsz_dvb_encodings[i] != NULL; i++) {
116 if (!strcasecmp(psz_encoding, ppsz_dvb_encodings[i])) {
117 uint8_t *p_encoded = (uint8_t *)malloc(i_length + 1);
118 *pi_out_length = i_length + 1;
119 p_encoded[0] = i;
120 memcpy(p_encoded + 1, p_string, i_length);
121 return p_encoded;
122 }
123 }
124
125 for (i = 0; ppsz_dvb_encodings10[i] != NULL; i++) {
126 if (!strcasecmp(psz_encoding, ppsz_dvb_encodings10[i])) {
127 uint8_t *p_encoded = (uint8_t *)malloc(i_length + 3);
128 *pi_out_length = i_length + 3;
129 p_encoded[0] = 0x10;
130 p_encoded[1] = 0x0;
131 p_encoded[2] = i;
132 memcpy(p_encoded + 3, p_string, i_length);
133 return p_encoded;
134 }
135 }
136
137 *pi_out_length = 0;
138 return NULL;
139 }
140
dvb_string_set(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length)141 static inline uint8_t *dvb_string_set(const uint8_t *p_string, size_t i_length,
142 const char *psz_encoding,
143 size_t *pi_out_length)
144 {
145 return dvb_string_set_inner(p_string, i_length, psz_encoding,
146 pi_out_length, "ISO6937");
147 }
148
dvb_string_set_quirks(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length,const char * psz_provider)149 static inline uint8_t *dvb_string_set_quirks(const uint8_t *p_string,
150 size_t i_length, const char *psz_encoding, size_t *pi_out_length,
151 const char *psz_provider)
152 {
153 if (psz_provider != NULL && !strcmp(psz_provider, "CSAT"))
154 return dvb_string_set_inner(p_string, i_length, psz_encoding,
155 pi_out_length, "ISO-8859-1");
156 return dvb_string_set_inner(p_string, i_length, psz_encoding,
157 pi_out_length, "ISO6937");
158 }
159
160 /* simpler API because this one doesn't output to multibyte charsets */
dvb_string_get_inner(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque,const char * psz_default_encoding)161 static inline char *dvb_string_get_inner(const uint8_t *p_string,
162 size_t i_length, f_iconv pf_iconv, void *iconv_opaque,
163 const char *psz_default_encoding)
164 {
165 if (i_length) {
166 const char *psz_encoding =
167 dvb_string_get_encoding(&p_string, &i_length, psz_default_encoding);
168 if (psz_encoding == NULL || !i_length) {
169 /* try one-byte charset */
170 char *psz_string = (char *)malloc(i_length + 1);
171 memcpy(psz_string, p_string, i_length);
172 psz_string[i_length] = '\0';
173 return psz_string;
174 }
175
176 return pf_iconv(iconv_opaque, psz_encoding, (char *)p_string, i_length);
177 }
178
179 return strdup("");
180 }
181
dvb_string_get(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque)182 static inline char *dvb_string_get(const uint8_t *p_string, size_t i_length,
183 f_iconv pf_iconv, void *iconv_opaque)
184 {
185 return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
186 "ISO6937");
187 }
188
dvb_string_get_quirks(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque,const char * psz_provider)189 static inline char *dvb_string_get_quirks(const uint8_t *p_string,
190 size_t i_length, f_iconv pf_iconv, void *iconv_opaque,
191 const char *psz_provider)
192 {
193 if (psz_provider != NULL && !strcmp(psz_provider, "CSAT"))
194 return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
195 "ISO-8859-1");
196 return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
197 "ISO6937");
198 }
199
dvb_string_xml_escape(char * psz_input)200 static inline char *dvb_string_xml_escape(char *psz_input)
201 {
202 char *psz_output, *psz2;
203 char *psz1 = psz_input;
204 size_t i_output_size = 0;
205
206 while (*psz1) {
207 switch (*psz1) {
208 case '<':
209 case '>':
210 i_output_size += strlen("<");
211 break;
212 case '&':
213 i_output_size += strlen("&");
214 break;
215 case '"':
216 case '\'':
217 i_output_size += strlen(""");
218 break;
219 default:
220 i_output_size++;
221 }
222 psz1++;
223 }
224
225 psz2 = psz_output = (char *)malloc(i_output_size + 1);
226 psz1 = psz_input;
227 while (*psz1) {
228 switch (*psz1) {
229 case '<':
230 memcpy(psz2, "<", strlen("<"));
231 psz2 += strlen("<");
232 break;
233 case '>':
234 memcpy(psz2, ">", strlen(">"));
235 psz2 += strlen(">");
236 break;
237 case '&':
238 memcpy(psz2, "&", strlen("&"));
239 psz2 += strlen("&");
240 break;
241 case '"':
242 memcpy(psz2, """, strlen("""));
243 psz2 += strlen(""");
244 break;
245 case '\'':
246 memcpy(psz2, "'", strlen("'"));
247 psz2 += strlen("'");
248 break;
249 default:
250 *psz2++ = *psz1;
251 }
252 psz1++;
253 }
254 *psz2 = '\0';
255
256 free(psz_input);
257 return psz_output;
258 }
259
260 #ifdef __cplusplus
261 }
262 #endif
263
264 #endif
265