1 /*
2  * decoder for Closed Captions
3  *
4  * This decoder relies on MPlayer's OSD to display subtitles.
5  * Be warned that decoding is somewhat preliminary, though it basically works.
6  *
7  * Most notably, only the text information is decoded as of now, discarding
8  * color, background and position info (see source below).
9  *
10  * uses source from the xine closed captions decoder
11  *
12  * Copyright (C) 2002 Matteo Giani
13  *
14  * This file is part of MPlayer.
15  *
16  * MPlayer is free software; you can redistribute it and/or modify
17  * it under the terms of the GNU General Public License as published by
18  * the Free Software Foundation; either version 2 of the License, or
19  * (at your option) any later version.
20  *
21  * MPlayer is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License along
27  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
28  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include "config.h"
36 #include "mp_msg.h"
37 #include "sub_cc.h"
38 
39 #include "subreader.h"
40 
41 #include "libvo/video_out.h"
42 #include "sub.h"
43 
44 #include "libavutil/avutil.h"
45 #include "libavutil/common.h"
46 
47 
48 #define CC_MAX_LINE_LENGTH 64
49 
50 static char chartbl[128];
51 
52 static subtitle buf1,buf2;
53 static subtitle *fb,*bb;
54 
55 static unsigned int cursor_pos=0;
56 
57 static int initialized=0;
58 static int wtv_format;
59 
60 #define CC_ROLLON 1
61 #define CC_ROLLUP 2
62 
63 static int cc_mode=CC_ROLLON;
64 static int cc_lines=4; ///< number of visible rows in CC roll-up mode, not used in CC roll-on mode
65 
build_char_table(void)66 static void build_char_table(void)
67 {
68   int i;
69   /* first the normal ASCII codes */
70   for (i = 0; i < 128; i++)
71     chartbl[i] = (char) i;
72   /* now the special codes */
73   chartbl[0x2a] = 0xe1; /* Latin Small Letter A with acute */
74   chartbl[0x5c] = 0xe9; /* Latin Small Letter E with acute */
75   chartbl[0x5e] = 0xed; /* Latin Small Letter I with acute */
76   chartbl[0x5f] = 0xf3; /* Latin Small Letter O with acute */
77   chartbl[0x60] = 0xfa; /* Latin Small Letter U with acute */
78   chartbl[0x7b] = 0xe7; /* Latin Small Letter C with cedilla */
79   chartbl[0x7c] = 0xf7; /* Division sign */
80   chartbl[0x7d] = 0xd1; /* Latin Capital letter N with tilde */
81   chartbl[0x7e] = 0xf1; /* Latin Small Letter N with tilde */
82   chartbl[0x7f] = 0xa4; /* Currency sign FIXME: this should be a solid block */
83 }
84 
clear_buffer(subtitle * buf)85 static void clear_buffer(subtitle *buf)
86 {
87 	int i;
88 	buf->lines=0;
89 	for (i = 0; i < SUB_MAX_TEXT; i++) {
90 		free(buf->text[i]);
91 		buf->text[i] = NULL;
92 	}
93 }
94 
95 
96 /**
97  \brief scroll buffer one line up
98  \param buf buffer to scroll
99 */
scroll_buffer(subtitle * buf)100 static void scroll_buffer(subtitle* buf)
101 {
102 	int i;
103 
104 	while(buf->lines > cc_lines)
105 	{
106 		free(buf->text[0]);
107 
108 		for(i = 0; i < buf->lines - 1; i++) buf->text[i] = buf->text[i+1];
109 
110 		buf->text[buf->lines-1] = NULL;
111 		buf->lines--;
112 	}
113 }
114 
115 static int channel;
116 
subcc_init(void)117 void subcc_init(void)
118 {
119 	int i;
120 	//printf("subcc_init(): initing...\n");
121 	build_char_table();
122 	for(i=0;i<SUB_MAX_TEXT;i++) {buf1.text[i]=buf2.text[i]=NULL;}
123 	buf1.lines=buf2.lines=0;
124 	fb=&buf1;
125 	bb=&buf2;
126 	channel = -1;
127 
128 	initialized=1;
129 	wtv_format = 0;
130 }
131 
subcc_reset(void)132 void subcc_reset(void)
133 {
134     wtv_format = 0;
135     if (!initialized)
136         return;
137     clear_buffer(&buf1);
138     clear_buffer(&buf2);
139 }
140 
display_buffer(subtitle * buf)141 static void display_buffer(subtitle *buf)
142 {
143 	vo_sub = buf;
144 	vo_osd_changed(OSDTYPE_SUBTITLE);
145 }
146 
147 
append_char(char c)148 static void append_char(char c)
149 {
150 	if(!bb->lines) {bb->lines++; cursor_pos=0;}
151 	if(bb->text[bb->lines - 1]==NULL)
152 	{
153 		bb->text[bb->lines - 1] = calloc(1, CC_MAX_LINE_LENGTH);
154 		cursor_pos=0;
155 	}
156 
157 	if(c=='\n')
158 	{
159 		if(cursor_pos>0 && bb->lines < SUB_MAX_TEXT)
160 		{
161 			bb->lines++;cursor_pos=0;
162 			if(cc_mode==CC_ROLLUP){ //Carriage return - scroll buffer one line up
163 				bb->text[bb->lines - 1]=calloc(1, CC_MAX_LINE_LENGTH);
164 				scroll_buffer(bb);
165 			}
166 		}
167 	}
168 	else
169 	{
170 		if(cursor_pos==CC_MAX_LINE_LENGTH-1)
171 		{
172 			fprintf(stderr,"CC: append_char() reached CC_MAX_LINE_LENGTH!\n");
173 			return;
174 		}
175 		bb->text[bb->lines - 1][cursor_pos++]=c;
176 	}
177 	//In CC roll-up mode data should be shown immediately
178 	if(cc_mode==CC_ROLLUP) display_buffer(bb);
179 }
180 
181 
swap_buffers(void)182 static void swap_buffers(void)
183 {
184 	subtitle *foo;
185 	foo=fb;
186 	fb=bb;
187 	bb=foo;
188 }
189 
selected_channel(void)190 static int selected_channel(void)
191 {
192     return subcc_enabled - 1;
193 }
194 
cc_decode_EIA608(unsigned short int data)195 static void cc_decode_EIA608(unsigned short int data)
196 {
197 
198   static unsigned short int lastcode=0x0000;
199   uint8_t c1 = data & 0x7f;
200   uint8_t c2 = (data >> 8) & 0x7f;
201 
202   if (c1 & 0x60) {		/* normal character, 0x20 <= c1 <= 0x7f */
203 	   if (channel != (selected_channel() & 1))
204 		   return;
205 	   append_char(chartbl[c1]);
206 	   if(c2 & 0x60)	/*c2 might not be a normal char even if c1 is*/
207 		   append_char(chartbl[c2]);
208   }
209   else if (c1 & 0x10)		// control code / special char
210   {
211 	  channel = (c1 & 0x08) >> 3;
212 	  if (channel != (selected_channel() & 1))
213 		return;
214 	  c1&=~0x08;
215 	  if(data!=lastcode)
216 	  {
217 	  	if(c2 & 0x40) {	/*PAC, Preamble Address Code */
218 			append_char('\n'); /*FIXME properly interpret PACs*/
219 		}
220 		else
221 			switch(c1)
222 			{
223 				case 0x10:	break; // ext attribute
224 				case 0x11:
225 					if((c2 & 0x30)==0x30)
226 					{
227 						//printf("[debug]:Special char (ignored)\n");
228 						/*cc_decode_special_char()*/;
229 					}
230 					else if (c2 & 0x20)
231 					{
232 						//printf("[debug]: midrow_attr (ignored)\n");
233 						/*cc_decode_midrow_attr()*/;
234 					}
235 					break;
236 				case 0x14:
237 					switch(c2)
238 					{
239 						case 0x00: //CC roll-on mode
240 							   cc_mode=CC_ROLLON;
241 							   break;
242 						case 0x25: //CC roll-up, 2 rows
243 						case 0x26: //CC roll-up, 3 rows
244 						case 0x27: //CC roll-up, 4 rows
245 							   cc_lines=c2-0x23;
246 							   cc_mode=CC_ROLLUP;
247 							   break;
248 						case 0x2C: display_buffer(NULL); //EDM
249 							   clear_buffer(fb); break;
250 						case 0x2d: append_char('\n');	//carriage return
251 							   break;
252 						case 0x2e: clear_buffer(bb);	//ENM
253 							   break;
254 						case 0x2f: swap_buffers();	//Swap buffers
255 							   display_buffer(fb);
256 							   clear_buffer(bb);
257 							   break;
258 					}
259 					break;
260 				case 0x17:
261 					if( c2>=0x21 && c2<=0x23) //TAB
262 					{
263 						break;
264 					}
265 			}
266 	  }
267   }
268   lastcode=data;
269 }
270 
subcc_decode(const uint8_t * inputbuffer,unsigned int inputlength)271 static void subcc_decode(const uint8_t *inputbuffer, unsigned int inputlength)
272 {
273   /* The first number may denote a channel number. I don't have the
274    * EIA-708 standard, so it is hard to say.
275    * From what I could figure out so far, the general format seems to be:
276    *
277    * repeat
278    *
279    *   0xfe starts 2 byte sequence of unknown purpose. It might denote
280    *        field #2 in line 21 of the VBI.
281    *        Treating it identical of 0xff fixes
282    *        http://samples.mplayerhq.hu/MPEG-VOB/ClosedCaptions/Starship_Troopers.vob
283    *
284    *   0xff starts 2 byte EIA-608 sequence, field #1 in line 21 of the VBI.
285    *        Followed by a 3-code triplet that starts either with 0xff or
286    *        0xfe. In either case, the following triplet needs to be ignored
287    *        for line 21, field 1.
288    *
289    *   0x00 is padding, followed by 2 more 0x00.
290    *
291    *   0x01 always seems to appear at the beginning, always seems to
292    *        be followed by 0xf8, 8-bit number.
293    *        The lower 7 bits of this 8-bit number seem to denote the
294    *        number of code triplets that follow.
295    *        The most significant bit denotes whether the Line 21 field 1
296    *        captioning information is at odd or even triplet offsets from this
297    *        beginning triplet. 1 denotes odd offsets, 0 denotes even offsets.
298    *
299    *        Most captions are encoded with odd offsets, so this is what we
300    *        will assume.
301    *
302    * until end of packet
303    */
304   const uint8_t *current = inputbuffer;
305   unsigned int curbytes = 0;
306   uint8_t data1, data2;
307   uint8_t cc_code;
308   int odd_offset = 1;
309 
310   while (curbytes < inputlength) {
311     cc_code = current[0];
312 
313     if (inputlength - curbytes < 2) {
314 #ifdef LOG_DEBUG
315       fprintf(stderr, "Not enough data for 2-byte CC encoding\n");
316 #endif
317       break;
318     }
319 
320     data1 = current[1];
321     data2 = current[2];
322     current += 3; curbytes += 3;
323 
324     // 0xfe/0xff are both used on plain EIA-608 CC and
325     // for extended EIA-708 (where 0xfc/0xfd is used for
326     // compatibility layer).
327     // Allow using channel bit 2 to select between which
328     // ones to look in.
329     switch (cc_code) {
330     case 0xfc:
331     case 0xfd:
332     case 0xfe:
333     case 0xff:
334       if ((cc_code & 2) == (selected_channel() & 4) >> 1)
335           break;
336       odd_offset ^= 1;
337       if (odd_offset != (selected_channel() & 2) >> 1)
338           break;
339       /* expect EIA-608 CC1/CC2 encoding */
340       // FIXME check parity!
341       // Parity check omitted assuming we are reading from a DVD and therefore
342       // we should encounter no "transmission errors".
343       cc_decode_EIA608(data1 | (data2 << 8));
344       break;
345 
346     case 0xfa:
347     case 0x00:
348       /* This seems to be just padding */
349       break;
350 
351     case 0x01:
352       odd_offset = data2 >> 7;
353       break;
354 
355     default:
356 //#ifdef LOG_DEBUG
357       fprintf(stderr, "Unknown CC encoding: %x\n", cc_code);
358 //#endif
359       break;
360     }
361   }
362 }
363 
364 static const uint8_t mov_cc_signature_1[] = {0, 0, 0, 0xa, 'c', 'd', 'a', 't'};
365 static const uint8_t mov_cc_signature_2[] = {0, 0, 0, 0xa, 'c', 'd', 't', '2'};
366 /**
367  * MOV uses a vastly more verbose representation for EIA 608 CC data than DVDs.
368  * This function handles that case.
369  */
mov_subcc_decode(const uint8_t * data,unsigned len)370 static void mov_subcc_decode(const uint8_t *data, unsigned len)
371 {
372     while (len >= 10) {
373         int channel = -1;
374         if (memcmp(data, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0) {
375             channel = 0;
376         } else if (memcmp(data, mov_cc_signature_2, sizeof(mov_cc_signature_2)) == 0) {
377             channel = 1;
378         } else {
379             mp_msg(MSGT_OSD, MSGL_V, "Unknown MOV 608 CC formatting\n");
380             data++;
381             len--;
382             continue;
383         }
384         if (channel == selected_channel() >> 1)
385             cc_decode_EIA608(data[8] | (data[9] << 8));
386         data += 10;
387         len -= 10;
388     }
389 }
390 
subcc_process_data(const uint8_t * inputdata,unsigned int len)391 void subcc_process_data(const uint8_t *inputdata, unsigned int len)
392 {
393 	int mov_mode = len >= 10 &&
394 	               memcmp(inputdata, mov_cc_signature_1, sizeof(mov_cc_signature_1)) == 0;
395 	if(!subcc_enabled) return;
396 	if(!initialized) subcc_init();
397 
398 	if (mov_mode) {
399 		mov_subcc_decode(inputdata, len);
400 		return;
401 	}
402 	if (len & 1) wtv_format = 0;
403 	if (len == 2) {
404 		// EIA-608 compatibility part.
405 		// Full EIA-708 parts have length >= 4 (multiple of 2).
406 		cc_decode_EIA608(inputdata[0] | (inputdata[1] << 8));
407 		wtv_format = 1;
408 	}
409 	if (wtv_format)
410 		return;
411 	subcc_decode(inputdata, len);
412 }
413 
414 /**
415  * This processes CC captions in the format as found in ATSC broadcasts.
416  * Like DVD CC it is stored inside the MPEG-frame userdata, but with two
417  * differences:
418  * 1) It starts with "GA" instead of "CC"
419  * 2) It _must_ be reordered in the way the decoder reorders the video frames
420  * The latter makes things difficult and is the reason why there is no support
421  * for this yet beyond this function.
422  */
subcc_process_eia708(const uint8_t * data,int len)423 void subcc_process_eia708(const uint8_t *data, int len)
424 {
425     int cc_count;
426     if (!subcc_enabled)
427         return;
428     if (!initialized)
429         subcc_init();
430     if (len <= 5)
431         return;
432     if (data[0] != '9' || data[1] != '4' || data[2] != 3) {
433         mp_msg(MSGT_OSD, MSGL_ERR, "Unknown ATSC CC type "
434                                    "0x%"PRIx8" 0x%"PRIx8" 0x%"PRIx8"\n",
435                                    data[0], data[1], data[2]);
436         return;
437     }
438     // process_cc_data_flag
439     if (!(data[3] & 0x40))
440         return;
441     cc_count = data[3] & 0x1f;
442     data += 5;
443     len  -= 5;
444     cc_count = FFMIN(cc_count, len / 3);
445     while (cc_count--) {
446         // EAI-608 data
447         if ((data[0] & 0xfe) == 0xfc && (data[0] & 1) == selected_channel() >> 1)
448             cc_decode_EIA608(data[1] | (data[2] << 8));
449         data += 3;
450     }
451 }
452