1 /*
2  * straightforward (to be) optimized JPEG encoder for the YUV422 format
3  * based on MJPEG code from FFmpeg
4  *
5  * For an excellent introduction to the JPEG format, see:
6  * http://www.ece.purdue.edu/~bouman/grad-labs/lab8/pdf/lab.pdf
7  *
8  * Copyright (c) 2002, Rik Snel
9  * parts from FFmpeg Copyright (c) 2000-2002 Fabrice Bellard
10  *
11  * This file is part of MPlayer.
12  *
13  * MPlayer is free software; you can redistribute it and/or modify
14  * it under the terms of the GNU General Public License as published by
15  * the Free Software Foundation; either version 2 of the License, or
16  * (at your option) any later version.
17  *
18  * MPlayer is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21  * GNU General Public License for more details.
22  *
23  * You should have received a copy of the GNU General Public License along
24  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
25  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26  */
27 
28 
29 
30 #include <sys/types.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include "config.h"
35 #include "mp_msg.h"
36 /* We need this #define because we need ../libavcodec/common.h to #define
37  * be2me_32, otherwise the linker will complain that it doesn't exist */
38 #define HAVE_AV_CONFIG_H
39 #include "libavcodec/avcodec.h"
40 #include "libavcodec/dsputil.h"
41 #include "libavcodec/mpegvideo.h"
42 #include "libavcodec/mjpegenc.h"
43 
44 #include "av_helpers.h"
45 #include "jpeg_enc.h"
46 
47 
48 /* Begin excessive code duplication ************************************/
49 /* Code coming from mpegvideo.c and mjpeg.c in ../libavcodec ***********/
50 
51 static const unsigned short aanscales[64] = {
52     /* precomputed values scaled up by 14 bits */
53     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
54     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
55     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
56     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
57     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
58     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
59     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
60     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
61 };
62 
convert_matrix(MpegEncContext * s,int (* qmat)[64],uint16_t (* qmat16)[2][64],const uint16_t * quant_matrix,int bias,int qmin,int qmax)63 static void convert_matrix(MpegEncContext *s, int (*qmat)[64],
64 		uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix,
65 		int bias, int qmin, int qmax)
66 {
67     int qscale;
68 
69     for(qscale=qmin; qscale<=qmax; qscale++){
70         int i;
71 	if (s->dsp.fdct == ff_jpeg_fdct_islow_8) {
72 		for (i = 0; i < 64; i++) {
73 			const int j = s->dsp.idct_permutation[i];
74 			/* 16    <= qscale * quant_matrix[i] <= 7905
75 			 * 19952 <= aanscales[i] *  \
76 			 * 	        qscale * quant_matrix[i]     <= 205026
77 			 * (1<<36)/19952 >= (1<<36)/(aanscales[i] * \
78 			 * 	qscale * quant_matrix[i]) >= (1<<36)/249205025
79 			 * 3444240       >= (1<<36)/(aanscales[i] *
80 			 *      qscale * quant_matrix[i]) >= 275              */
81 			qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT-3))/
82 					(qscale * quant_matrix[j]));
83 		}
84 	} else if (s->dsp.fdct == ff_fdct_ifast) {
85             for(i=0;i<64;i++) {
86                 const int j = s->dsp.idct_permutation[i];
87                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
88                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
89                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
90                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
91 
92                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) /
93                                 (aanscales[i] * qscale * quant_matrix[j]));
94             }
95         } else {
96             for(i=0;i<64;i++) {
97 		const int j = s->dsp.idct_permutation[i];
98                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
99                    So 16           <= qscale * quant_matrix[i]             <= 7905
100                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
101                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
102                 */
103                 qmat  [qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]));
104                 qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
105 
106                 if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
107                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
108             }
109         }
110     }
111 }
112 
encode_dc(MpegEncContext * s,int val,uint8_t * huff_size,uint16_t * huff_code)113 static inline void encode_dc(MpegEncContext *s, int val,
114                              uint8_t *huff_size, uint16_t *huff_code)
115 {
116     int mant, nbits;
117 
118     if (val == 0) {
119         put_bits(&s->pb, huff_size[0], huff_code[0]);
120     } else {
121         mant = val;
122         if (val < 0) {
123             val = -val;
124             mant--;
125         }
126 
127         /* compute the log (XXX: optimize) */
128         nbits = 0;
129         while (val != 0) {
130             val = val >> 1;
131             nbits++;
132         }
133 
134         put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
135 
136         put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
137     }
138 }
139 
encode_block(MpegEncContext * s,int16_t * block,int n)140 static void encode_block(MpegEncContext *s, int16_t *block, int n)
141 {
142     int mant, nbits, code, i, j;
143     int component, dc, run, last_index, val;
144     MJpegContext *m = s->mjpeg_ctx;
145     uint8_t *huff_size_ac;
146     uint16_t *huff_code_ac;
147 
148     /* DC coef */
149     component = (n <= 3 ? 0 : n - 4 + 1);
150     dc = block[0]; /* overflow is impossible */
151     val = dc - s->last_dc[component];
152     if (n < 4) {
153         encode_dc(s, val, m->huff_size_dc_luminance, m->huff_code_dc_luminance);
154         huff_size_ac = m->huff_size_ac_luminance;
155         huff_code_ac = m->huff_code_ac_luminance;
156     } else {
157         encode_dc(s, val, m->huff_size_dc_chrominance, m->huff_code_dc_chrominance);
158         huff_size_ac = m->huff_size_ac_chrominance;
159         huff_code_ac = m->huff_code_ac_chrominance;
160     }
161     s->last_dc[component] = dc;
162 
163     /* AC coefs */
164 
165     run = 0;
166     last_index = s->block_last_index[n];
167     for(i=1;i<=last_index;i++) {
168         j = s->intra_scantable.permutated[i];
169         val = block[j];
170         if (val == 0) {
171             run++;
172         } else {
173             while (run >= 16) {
174                 put_bits(&s->pb, huff_size_ac[0xf0], huff_code_ac[0xf0]);
175                 run -= 16;
176             }
177             mant = val;
178             if (val < 0) {
179                 val = -val;
180                 mant--;
181             }
182 
183             /* compute the log (XXX: optimize) */
184             nbits = 0;
185             while (val != 0) {
186                 val = val >> 1;
187                 nbits++;
188             }
189             code = (run << 4) | nbits;
190 
191             put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
192 
193             put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
194             run = 0;
195         }
196     }
197 
198     /* output EOB only if not already 64 values */
199     if (last_index < 63 || run != 0)
200         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
201 }
202 
clip_coeffs(MpegEncContext * s,int16_t * block,int last_index)203 static inline void clip_coeffs(MpegEncContext *s, int16_t *block, int last_index)
204 {
205     int i;
206     const int maxlevel= s->max_qcoeff;
207     const int minlevel= s->min_qcoeff;
208 
209     for(i=0; i<=last_index; i++){
210         const int j = s->intra_scantable.permutated[i];
211         int level = block[j];
212 
213         if     (level>maxlevel) level=maxlevel;
214         else if(level<minlevel) level=minlevel;
215         block[j]= level;
216     }
217 }
218 
219 /* End excessive code duplication **************************************/
220 
221 /* this function is a reproduction of the one in mjpeg, it includes two
222  * changes, it allows for black&white encoding (it skips the U and V
223  * macroblocks and it outputs the huffman code for 'no change' (dc) and
224  * 'all zero' (ac)) and it takes 4 macroblocks (422) instead of 6 (420) */
zr_mjpeg_encode_mb(jpeg_enc_t * j)225 static void zr_mjpeg_encode_mb(jpeg_enc_t *j) {
226 
227 	MJpegContext *m = j->s->mjpeg_ctx;
228 
229 	encode_block(j->s, j->s->block[0], 0);
230 	encode_block(j->s, j->s->block[1], 1);
231 	if (j->bw) {
232 		/* U */
233 		put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
234 				m->huff_code_dc_chrominance[0]);
235 		put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
236 				m->huff_code_ac_chrominance[0]);
237 		/* V */
238 		put_bits(&j->s->pb, m->huff_size_dc_chrominance[0],
239 				m->huff_code_dc_chrominance[0]);
240 		put_bits(&j->s->pb, m->huff_size_ac_chrominance[0],
241 				m->huff_code_ac_chrominance[0]);
242     	} else {
243 		/* we trick encode_block here so that it uses
244 		 * chrominance huffman tables instead of luminance ones
245 		 * (see the effect of second argument of encode_block) */
246 		encode_block(j->s, j->s->block[2], 4);
247 		encode_block(j->s, j->s->block[3], 5);
248     	}
249 }
250 
251 /* this function can take all kinds of YUV colorspaces
252  * YV12, YVYU, UYVY. The necesary parameters must be set up by the caller
253  * y_ps means "y pixel size", y_rs means "y row size".
254  * For YUYV, for example, is u_buf = y_buf + 1, v_buf = y_buf + 3,
255  * y_ps = 2, u_ps = 4, v_ps = 4, y_rs = u_rs = v_rs.
256  *
257  *  The actual buffers must be passed with mjpeg_encode_frame, this is
258  *  to make it possible to call encode on the buffer provided by the
259  *  codec in draw_frame.
260  *
261  * The data is straightened out at the moment it is put in DCT
262  * blocks, there are therefore no spurious memcopies involved */
263 /* Notice that w must be a multiple of 16 and h must be a multiple of 8 */
264 /* We produce YUV422 jpegs, the colors must be subsampled horizontally,
265  * if the colors are also subsampled vertically, then this function
266  * performs cheap upsampling (better solution will be: a DCT that is
267  * optimized in the case that every two rows are the same) */
268 /* cu = 0 means 'No cheap upsampling'
269  * cu = 1 means 'perform cheap upsampling' */
270 /* The encoder doesn't know anything about interlacing, the halve height
271  * needs to be passed and the double rowstride. Which field gets encoded
272  * is decided by what buffers are passed to mjpeg_encode_frame */
jpeg_enc_init(int w,int h,int y_psize,int y_rsize,int u_psize,int u_rsize,int v_psize,int v_rsize,int cu,int q,int b)273 jpeg_enc_t *jpeg_enc_init(int w, int h, int y_psize, int y_rsize,
274 		int u_psize, int u_rsize, int v_psize, int v_rsize,
275 		int cu, int q, int b) {
276 	jpeg_enc_t *j;
277 	int i = 0;
278 	mp_msg(MSGT_VO, MSGL_V, "JPEnc init: %dx%d %d %d %d %d %d %d\n",
279 			w, h, y_psize, y_rsize, u_psize,
280 			u_rsize, v_psize, v_rsize);
281 
282 	j = av_malloc(sizeof(jpeg_enc_t));
283 	if (j == NULL) return NULL;
284 
285 	j->s = av_malloc(sizeof(MpegEncContext));
286 	memset(j->s,0x00,sizeof(MpegEncContext));
287 	if (j->s == NULL) {
288 		av_free(j);
289 		return NULL;
290 	}
291 
292 	/* info on how to access the pixels */
293 	j->y_ps = y_psize;
294 	j->u_ps = u_psize;
295 	j->v_ps = v_psize;
296 	j->y_rs = y_rsize;
297 	j->u_rs = u_rsize;
298 	j->v_rs = v_rsize;
299 
300 	j->s->width = w;
301 	j->s->height = h;
302 	j->s->qscale = q;
303 
304 	j->s->out_format = FMT_MJPEG;
305 	j->s->intra_only = 1;
306 	j->s->encoding = 1;
307 	j->s->pict_type = AV_PICTURE_TYPE_I;
308 	j->s->y_dc_scale = 8;
309 	j->s->c_dc_scale = 8;
310 
311 	//FIXME j->s->mjpeg_write_tables = 1;
312 	j->s->mjpeg_vsample[0] = 1;
313 	j->s->mjpeg_vsample[1] = 1;
314 	j->s->mjpeg_vsample[2] = 1;
315 	j->s->mjpeg_hsample[0] = 2;
316 	j->s->mjpeg_hsample[1] = 1;
317 	j->s->mjpeg_hsample[2] = 1;
318 
319 	j->cheap_upsample = cu;
320 	j->bw = b;
321 
322 	init_avcodec();
323 
324 	if (ff_mjpeg_encode_init(j->s) < 0) {
325 		av_free(j->s);
326 		av_free(j);
327 		return NULL;
328 	}
329 
330 	/* alloc bogus avctx to keep MPV_common_init from segfaulting */
331 	j->s->avctx = calloc(sizeof(*j->s->avctx), 1);
332 	/* Set up to encode mjpeg */
333 	j->s->avctx->codec_id = AV_CODEC_ID_MJPEG;
334 
335 	/* make MPV_common_init allocate important buffers, like s->block */
336 	j->s->avctx->thread_count = 1;
337 
338 	if (ff_MPV_common_init(j->s) < 0) {
339 		av_free(j->s);
340 		av_free(j);
341 		return NULL;
342 	}
343 
344 	/* correct the value for sc->mb_height */
345 	j->s->mb_height = j->s->height/8;
346 	j->s->mb_intra = 1;
347 
348 	j->s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
349 	for (i = 1; i < 64; i++)
350 		j->s->intra_matrix[i] = av_clip_uint8(
351 			(ff_mpeg1_default_intra_matrix[i]*j->s->qscale) >> 3);
352 	convert_matrix(j->s, j->s->q_intra_matrix, j->s->q_intra_matrix16,
353 			j->s->intra_matrix, j->s->intra_quant_bias, 8, 8);
354 	return j;
355 }
356 
jpeg_enc_frame(jpeg_enc_t * j,unsigned char * y_data,unsigned char * u_data,unsigned char * v_data,char * bufr)357 int jpeg_enc_frame(jpeg_enc_t *j, unsigned char *y_data,
358 		unsigned char *u_data, unsigned char *v_data, char *bufr) {
359 	int i, k, mb_x, mb_y, overflow;
360 	short int *dest;
361 	unsigned char *source;
362 	/* initialize the buffer */
363 
364 	init_put_bits(&j->s->pb, bufr, 1024*256);
365 
366 	ff_mjpeg_encode_picture_header(j->s);
367 
368 	j->s->header_bits = put_bits_count(&j->s->pb);
369 
370 	j->s->last_dc[0] = 128;
371 	j->s->last_dc[1] = 128;
372 	j->s->last_dc[2] = 128;
373 
374 	for (mb_y = 0; mb_y < j->s->mb_height; mb_y++) {
375 		for (mb_x = 0; mb_x < j->s->mb_width; mb_x++) {
376 			/* conversion 8 to 16 bit and filling of blocks
377 			 * must be mmx optimized */
378 			/* fill 2 Y macroblocks and one U and one V */
379 			source = mb_y * 8 * j->y_rs +
380 				16 * j->y_ps * mb_x + y_data;
381 			dest = j->s->block[0];
382 			for (i = 0; i < 8; i++) {
383 				for (k = 0; k < 8; k++) {
384 					dest[k] = source[k*j->y_ps];
385 				}
386 				dest += 8;
387 				source += j->y_rs;
388 			}
389 			source = mb_y * 8 * j->y_rs +
390 				(16*mb_x + 8)*j->y_ps + y_data;
391 			dest = j->s->block[1];
392 			for (i = 0; i < 8; i++) {
393 				for (k = 0; k < 8; k++) {
394 					dest[k] = source[k*j->y_ps];
395 				}
396 				dest += 8;
397 				source += j->y_rs;
398 			}
399 			if (!j->bw && j->cheap_upsample) {
400 				source = mb_y*4*j->u_rs +
401 					8*mb_x*j->u_ps + u_data;
402 				dest = j->s->block[2];
403 				for (i = 0; i < 4; i++) {
404 					for (k = 0; k < 8; k++) {
405 						dest[k] = source[k*j->u_ps];
406 						dest[k+8] = source[k*j->u_ps];
407 					}
408 					dest += 16;
409 					source += j->u_rs;
410 				}
411 				source = mb_y*4*j->v_rs +
412 					8*mb_x*j->v_ps + v_data;
413 				dest = j->s->block[3];
414 				for (i = 0; i < 4; i++) {
415 					for (k = 0; k < 8; k++) {
416 						dest[k] = source[k*j->v_ps];
417 						dest[k+8] = source[k*j->v_ps];
418 					}
419 					dest += 16;
420 					source += j->u_rs;
421 				}
422 			} else if (!j->bw && !j->cheap_upsample) {
423 				source = mb_y*8*j->u_rs +
424 					8*mb_x*j->u_ps + u_data;
425 				dest = j->s->block[2];
426 				for (i = 0; i < 8; i++) {
427 					for (k = 0; k < 8; k++)
428 						dest[k] = source[k*j->u_ps];
429 					dest += 8;
430 					source += j->u_rs;
431 				}
432 				source = mb_y*8*j->v_rs +
433 					8*mb_x*j->v_ps + v_data;
434 				dest = j->s->block[3];
435 				for (i = 0; i < 8; i++) {
436 					for (k = 0; k < 8; k++)
437 						dest[k] = source[k*j->v_ps];
438 					dest += 8;
439 					source += j->u_rs;
440 				}
441 			}
442 			emms_c(); /* is this really needed? */
443 
444 			j->s->block_last_index[0] =
445 				j->s->dct_quantize(j->s, j->s->block[0],
446 						0, 8, &overflow);
447 			if (overflow) clip_coeffs(j->s, j->s->block[0],
448 					j->s->block_last_index[0]);
449 			j->s->block_last_index[1] =
450 				j->s->dct_quantize(j->s, j->s->block[1],
451 						1, 8, &overflow);
452 			if (overflow) clip_coeffs(j->s, j->s->block[1],
453 					j->s->block_last_index[1]);
454 
455 			if (!j->bw) {
456 				j->s->block_last_index[4] =
457 					j->s->dct_quantize(j->s, j->s->block[2],
458 							4, 8, &overflow);
459 				if (overflow) clip_coeffs(j->s, j->s->block[2],
460 						j->s->block_last_index[2]);
461 				j->s->block_last_index[5] =
462 					j->s->dct_quantize(j->s, j->s->block[3],
463 							5, 8, &overflow);
464 				if (overflow) clip_coeffs(j->s, j->s->block[3],
465 						j->s->block_last_index[3]);
466 			}
467 			zr_mjpeg_encode_mb(j);
468 		}
469 	}
470 	emms_c();
471 	ff_mjpeg_encode_picture_trailer(j->s);
472 	flush_put_bits(&j->s->pb);
473 
474 	//FIXME
475 	//if (j->s->mjpeg_write_tables == 1)
476 	//	j->s->mjpeg_write_tables = 0;
477 
478 	return put_bits_ptr(&(j->s->pb)) - j->s->pb.buf;
479 }
480 
jpeg_enc_uninit(jpeg_enc_t * j)481 void jpeg_enc_uninit(jpeg_enc_t *j) {
482 	ff_mjpeg_encode_close(j->s);
483 	av_free(j->s);
484 	av_free(j);
485 }
486