1 /*****************************************************************************
2  * strings.h: ETSI EN 300 468 Strings
3  *****************************************************************************
4  * Copyright (C) 2009-2010 VideoLAN
5  *
6  * Authors: Christophe Massiot <massiot@via.ecp.fr>
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining
9  * a copy of this software and associated documentation files (the
10  * "Software"), to deal in the Software without restriction, including
11  * without limitation the rights to use, copy, modify, merge, publish,
12  * distribute, sublicense, and/or sell copies of the Software, and to
13  * permit persons to whom the Software is furnished to do so, subject
14  * to the following conditions:
15  *
16  * The above copyright notice and this permission notice shall be
17  * included in all copies or substantial portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *****************************************************************************/
27 
28 /*
29  * Normative references:
30  *  - ETSI EN 300 468 V1.11.1 (2010-04) (SI in DVB systems)
31  */
32 
33 #ifndef __BITSTREAM_DVB_STRINGS_H__
34 #define __BITSTREAM_DVB_STRINGS_H__
35 
36 #include <stdlib.h>   /* malloc, free */
37 
38 #include <bitstream/common.h>
39 
40 #ifdef __cplusplus
41 extern "C"
42 {
43 #endif
44 
45 /*****************************************************************************
46  * DVB string
47  *****************************************************************************/
48 static const char *ppsz_dvb_encodings[] = {
49     /* 0x00 - 0x0f */
50     "", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8",
51     "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13",
52     "ISO-8859-14", "ISO-8859-15", "", "", "", "",
53 
54     /* 0x10 - 0x1f */
55     "", "UCS-2BE", "EUC-KR", "GB2312", "UCS-2BE", "UTF-8",
56     "", "", "", "", "", "", "", "", "", "", NULL
57 };
58 static const char *ppsz_dvb_encodings10[] = {
59     "", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4",
60     "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9",
61     "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13", "ISO-8859-14",
62     "ISO-8859-15", NULL
63 };
64 
dvb_string_get_encoding(const uint8_t ** pp_string,size_t * pi_length,const char * psz_default_encoding)65 static inline const char *dvb_string_get_encoding(const uint8_t **pp_string,
66         size_t *pi_length, const char *psz_default_encoding)
67 {
68     uint8_t i_first;
69 
70     if (!*pi_length)
71         return NULL;
72     i_first = (*pp_string)[0];
73 
74     if (!i_first)
75         return NULL;
76     if (i_first >= 0x20)
77         return psz_default_encoding;
78     (*pp_string)++;
79     (*pi_length)--;
80 
81     if (i_first == 0x10 && *pi_length >= 2) {
82         uint8_t i_second = (*pp_string)[0];
83         uint8_t i_third = (*pp_string)[1];
84         (*pp_string) += 2;
85         (*pi_length) -= 2;
86 
87         if (i_second != 0x0 || i_third == 0 || i_third >= 0x10)
88             return NULL;
89         return ppsz_dvb_encodings10[i_third];
90     }
91 
92     if (i_first == 0x1f && *pi_length >= 1) {
93         /* no info on these encodings, skip */
94         (*pp_string)++;
95         (*pi_length)--;
96         return NULL;
97     }
98 
99     return ppsz_dvb_encodings[i_first];
100 }
101 
dvb_string_set_inner(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length,const char * psz_default_encoding)102 static inline uint8_t *dvb_string_set_inner(const uint8_t *p_string,
103         size_t i_length, const char *psz_encoding, size_t *pi_out_length,
104         const char *psz_default_encoding)
105 {
106     int i;
107 
108     if (!strcmp(psz_encoding, psz_default_encoding)) {
109         uint8_t *p_encoded = (uint8_t *)malloc(i_length);
110         *pi_out_length = i_length;
111         memcpy(p_encoded, p_string, i_length);
112         return p_encoded;
113     }
114 
115     for (i = 0; ppsz_dvb_encodings[i] != NULL; i++) {
116         if (!strcasecmp(psz_encoding, ppsz_dvb_encodings[i])) {
117             uint8_t *p_encoded = (uint8_t *)malloc(i_length + 1);
118             *pi_out_length = i_length + 1;
119             p_encoded[0] = i;
120             memcpy(p_encoded + 1, p_string, i_length);
121             return p_encoded;
122         }
123     }
124 
125     for (i = 0; ppsz_dvb_encodings10[i] != NULL; i++) {
126         if (!strcasecmp(psz_encoding, ppsz_dvb_encodings10[i])) {
127             uint8_t *p_encoded = (uint8_t *)malloc(i_length + 3);
128             *pi_out_length = i_length + 3;
129             p_encoded[0] = 0x10;
130             p_encoded[1] = 0x0;
131             p_encoded[2] = i;
132             memcpy(p_encoded + 3, p_string, i_length);
133             return p_encoded;
134         }
135     }
136 
137     *pi_out_length = 0;
138     return NULL;
139 }
140 
dvb_string_set(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length)141 static inline uint8_t *dvb_string_set(const uint8_t *p_string, size_t i_length,
142                                       const char *psz_encoding,
143                                       size_t *pi_out_length)
144 {
145     return dvb_string_set_inner(p_string, i_length, psz_encoding,
146                                 pi_out_length, "ISO6937");
147 }
148 
dvb_string_set_quirks(const uint8_t * p_string,size_t i_length,const char * psz_encoding,size_t * pi_out_length,const char * psz_provider)149 static inline uint8_t *dvb_string_set_quirks(const uint8_t *p_string,
150         size_t i_length, const char *psz_encoding, size_t *pi_out_length,
151         const char *psz_provider)
152 {
153     if (psz_provider != NULL && !strcmp(psz_provider, "CSAT"))
154         return dvb_string_set_inner(p_string, i_length, psz_encoding,
155                                     pi_out_length, "ISO-8859-1");
156     return dvb_string_set_inner(p_string, i_length, psz_encoding,
157                                 pi_out_length, "ISO6937");
158 }
159 
160 /* simpler API because this one doesn't output to multibyte charsets */
dvb_string_get_inner(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque,const char * psz_default_encoding)161 static inline char *dvb_string_get_inner(const uint8_t *p_string,
162         size_t i_length, f_iconv pf_iconv, void *iconv_opaque,
163         const char *psz_default_encoding)
164 {
165     if (i_length) {
166         const char *psz_encoding =
167             dvb_string_get_encoding(&p_string, &i_length, psz_default_encoding);
168         if (psz_encoding == NULL || !i_length) {
169             /* try one-byte charset */
170             char *psz_string = (char *)malloc(i_length + 1);
171             memcpy(psz_string, p_string, i_length);
172             psz_string[i_length] = '\0';
173             return psz_string;
174         }
175 
176         return pf_iconv(iconv_opaque, psz_encoding, (char *)p_string, i_length);
177     }
178 
179     return strdup("");
180 }
181 
dvb_string_get(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque)182 static inline char *dvb_string_get(const uint8_t *p_string, size_t i_length,
183                                    f_iconv pf_iconv, void *iconv_opaque)
184 {
185     return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
186                                 "ISO6937");
187 }
188 
dvb_string_get_quirks(const uint8_t * p_string,size_t i_length,f_iconv pf_iconv,void * iconv_opaque,const char * psz_provider)189 static inline char *dvb_string_get_quirks(const uint8_t *p_string,
190         size_t i_length, f_iconv pf_iconv, void *iconv_opaque,
191         const char *psz_provider)
192 {
193     if (psz_provider != NULL && !strcmp(psz_provider, "CSAT"))
194         return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
195                                     "ISO-8859-1");
196     return dvb_string_get_inner(p_string, i_length, pf_iconv, iconv_opaque,
197                                 "ISO6937");
198 }
199 
dvb_string_xml_escape(char * psz_input)200 static inline char *dvb_string_xml_escape(char *psz_input)
201 {
202     char *psz_output, *psz2;
203     char *psz1 = psz_input;
204     size_t i_output_size = 0;
205 
206     while (*psz1) {
207         switch (*psz1) {
208             case '<':
209             case '>':
210                 i_output_size += strlen("&lt;");
211                 break;
212             case '&':
213                 i_output_size += strlen("&amp;");
214                 break;
215             case '"':
216             case '\'':
217                 i_output_size += strlen("&quot;");
218                 break;
219             default:
220                 i_output_size++;
221         }
222         psz1++;
223     }
224 
225     psz2 = psz_output = (char *)malloc(i_output_size + 1);
226     psz1 = psz_input;
227     while (*psz1) {
228         switch (*psz1) {
229             case '<':
230                 memcpy(psz2, "&lt;", strlen("&lt;"));
231                 psz2 += strlen("&lt;");
232                 break;
233             case '>':
234                 memcpy(psz2, "&gt;", strlen("&gt;"));
235                 psz2 += strlen("&gt;");
236                 break;
237             case '&':
238                 memcpy(psz2, "&amp;", strlen("&amp;"));
239                 psz2 += strlen("&amp;");
240                 break;
241             case '"':
242                 memcpy(psz2, "&quot;", strlen("&quot;"));
243                 psz2 += strlen("&quot;");
244                 break;
245             case '\'':
246                 memcpy(psz2, "&apos;", strlen("&apos;"));
247                 psz2 += strlen("&apos;");
248                 break;
249             default:
250                 *psz2++ = *psz1;
251         }
252         psz1++;
253     }
254     *psz2 = '\0';
255 
256     free(psz_input);
257     return psz_output;
258 }
259 
260 #ifdef __cplusplus
261 }
262 #endif
263 
264 #endif
265