1 /*
2  * Copyright: 2017-2018, Björn Ståhl
3  * Description: A12 protocol state machine, substream decoding routines
4  * License: 3-Clause BSD, see COPYING file in arcan source repository.
5  * Reference: https://arcan-fe.com
6  */
7 #include <arcan_shmif.h>
8 #include <arcan_shmif_server.h>
9 
10 #include <inttypes.h>
11 #include <string.h>
12 #include <math.h>
13 
14 #include "a12.h"
15 #include "a12_int.h"
16 #include "zstd.h"
17 
18 #ifdef LOG_FRAME_OUTPUT
19 #define STB_IMAGE_WRITE_STATIC
20 #define STB_IMAGE_WRITE_IMPLEMENTATION
21 #define STB_IMAGE_IMPLEMENTATION
22 #include "../../engine/external/stb_image_write.h"
23 #endif
24 
drain_video(struct a12_channel * ch,struct video_frame * cvf)25 static void drain_video(struct a12_channel* ch, struct video_frame* cvf)
26 {
27 	cvf->commit = 0;
28 	if (ch->active == CHANNEL_RAW){
29 		a12int_trace(A12_TRACE_VIDEO,
30 			"kind=drain:dest=user:ts=%llu", arcan_timemillis());
31 
32 		if (ch->raw.signal_video){
33 			ch->raw.signal_video(cvf->x, cvf->y,
34 				cvf->x + cvf->w, cvf->y + cvf->h, ch->raw.tag);
35 		}
36 		return;
37 	}
38 
39 	a12int_trace(A12_TRACE_VIDEO,
40 		"kind=drain:dest=%"PRIxPTR":ts=%llu", (uintptr_t) ch->cont, arcan_timemillis());
41 	arcan_shmif_signal(ch->cont, SHMIF_SIGVID);
42 }
43 
a12int_buffer_format(int method)44 bool a12int_buffer_format(int method)
45 {
46 	return
47 		method == POSTPROCESS_VIDEO_H264 ||
48 		method == POSTPROCESS_VIDEO_TZSTD ||
49 		method == POSTPROCESS_VIDEO_ZSTD ||
50 		method == POSTPROCESS_VIDEO_DZSTD;
51 }
52 
video_miniz(const void * buf,int len,void * user)53 static int video_miniz(const void* buf, int len, void* user)
54 {
55 	struct a12_state* S = user;
56 	struct video_frame* cvf = &S->channels[S->in_channel].unpack_state.vframe;
57 	struct arcan_shmif_cont* cont = S->channels[S->in_channel].cont;
58 	const uint8_t* inbuf = buf;
59 
60 	if (!cont || len > cvf->expanded_sz){
61 		a12int_trace(A12_TRACE_SYSTEM, "decompression resulted in data overcommit");
62 		return 0;
63 	}
64 
65 /* we have a 1..4 byte spill from a previous call so we need to have
66  * a 1-px buffer that we populate before packing */
67 	if (cvf->carry){
68 		while (cvf->carry < 3){
69 			cvf->pxbuf[cvf->carry++] = *inbuf++;
70 			len--;
71 
72 /* and this spill can also be short */
73 			if (!len)
74 				return 1;
75 		}
76 
77 /* and commit */
78 		if (cvf->postprocess == POSTPROCESS_VIDEO_DZSTD){
79 			uint8_t r, g, b, a;
80 			SHMIF_RGBA_DECOMP(cont->vidp[cvf->out_pos], &r, &g, &b, &a);
81 
82 			cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
83 				cvf->pxbuf[0] ^ r,
84 				cvf->pxbuf[1] ^ g,
85 				cvf->pxbuf[2] ^ b,
86 				0xff
87 			);
88 		}
89 		else
90 			cont->vidp[cvf->out_pos++] =
91 				SHMIF_RGBA(cvf->pxbuf[0], cvf->pxbuf[1], cvf->pxbuf[2], 0xff);
92 
93 /* which can happen on a row boundary */
94 		cvf->row_left--;
95 		if (cvf->row_left == 0){
96 			cvf->out_pos -= cvf->w;
97 			cvf->out_pos += cont->pitch;
98 			cvf->row_left = cvf->w;
99 		}
100 		cvf->carry = 0;
101 	}
102 
103 /* tpack is easier, just write into vidb, ensure that we don't exceed
104  * the size from a missed resize_ call and the rest is done consumer side */
105 	if (cvf->postprocess == POSTPROCESS_VIDEO_TZSTD){
106 		memcpy(&cont->vidb[cvf->out_pos], inbuf, len);
107 		cvf->out_pos += len;
108 		cvf->expanded_sz -= len;
109 		return 1;
110 	}
111 
112 /* pixel-aligned fill/unpack, same as everywhere else */
113 	size_t npx = (len / 3) * 3;
114 	for (size_t i = 0; i < npx; i += 3){
115 		if (cvf->postprocess == POSTPROCESS_VIDEO_DZSTD){
116 			uint8_t r, g, b, a;
117 			SHMIF_RGBA_DECOMP(cont->vidp[cvf->out_pos], &r, &g, &b, &a);
118 
119 			cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
120 				inbuf[i+0] ^ r,
121 				inbuf[i+1] ^ g,
122 				inbuf[i+2] ^ b,
123 				0xff
124 			);
125 		}
126 		else{
127 			cont->vidp[cvf->out_pos++] =
128 				SHMIF_RGBA(inbuf[i], inbuf[i+1], inbuf[i+2], 0xff);
129 		}
130 
131 		cvf->row_left--;
132 		if (cvf->row_left == 0){
133 			cvf->out_pos -= cvf->w;
134 			cvf->out_pos += cont->pitch;
135 			cvf->row_left = cvf->w;
136 		}
137 	}
138 
139 /* we need to account for len bytes not aligning */
140 	if (len - npx){
141 		cvf->carry = 0;
142 		for (size_t i = 0; i < len - npx; i++){
143 			cvf->pxbuf[cvf->carry++] = inbuf[npx + i];
144 		}
145 	}
146 
147 	cvf->expanded_sz -= len;
148 	return 1;
149 }
150 
151 #ifdef WANT_H264_DEC
152 
ffmpeg_decode_pkt(struct a12_state * S,struct video_frame * cvf,struct arcan_shmif_cont * cont)153 void ffmpeg_decode_pkt(
154 	struct a12_state* S, struct video_frame* cvf, struct arcan_shmif_cont* cont)
155 {
156 	a12int_trace(A12_TRACE_VIDEO,
157 		"ffmpeg:packet_size=%d", cvf->ffmpeg.packet->size);
158 	int ret = avcodec_send_packet(cvf->ffmpeg.context, cvf->ffmpeg.packet);
159 	if (ret < 0){
160 		a12int_trace(A12_TRACE_VIDEO, "ffmpeg:packet_status=decode_fail");
161 		a12_vstream_cancel(S, S->in_channel, STREAM_CANCEL_DECODE_ERROR);
162 		return;
163 	}
164 
165 	while (ret >= 0){
166 		ret = avcodec_receive_frame(cvf->ffmpeg.context, cvf->ffmpeg.frame);
167 		if (ret == AVERROR(EAGAIN) || ret == AVERROR(EOF)){
168 			a12int_trace(A12_TRACE_VIDEO, "ffmpeg:avcodec=again|eof:value=%d", ret);
169 			return;
170 		}
171 		else if (ret != 0){
172 			a12int_trace(A12_TRACE_SYSTEM, "ffmpeg:avcodec=fail:code=%d", ret);
173 			a12_vstream_cancel(S, S->in_channel, STREAM_CANCEL_DECODE_ERROR);
174 			return;
175 		}
176 
177 		a12int_trace(A12_TRACE_VIDEO,
178 			"ffmpeg:kind=convert:commit=%d:format=yub420p", cvf->commit);
179 /* Quite possible that we should actually cache this context as well, but it
180  * has different behavior to the rest due to resize. Since this all turns
181  * ffmpeg into a dependency, maybe it belongs in the vframe setup on resize. */
182 		struct SwsContext* scaler =
183 			sws_getContext(cvf->w, cvf->h, AV_PIX_FMT_YUV420P,
184 				cvf->w, cvf->h, AV_PIX_FMT_BGRA, SWS_BILINEAR, NULL, NULL, NULL);
185 
186 		uint8_t* const dst[] = {cont->vidb};
187 		int dst_stride[] = {cont->stride};
188 
189 		sws_scale(scaler, (const uint8_t* const*) cvf->ffmpeg.frame->data,
190 			cvf->ffmpeg.frame->linesize, 0, cvf->h, dst, dst_stride);
191 
192 /* Mark that we should send a ping so the other side can update the drift wnd */
193 		if (cvf->commit && cvf->commit != 255){
194 			drain_video(&S->channels[S->in_channel], cvf);
195 		}
196 
197 		sws_freeContext(scaler);
198 	}
199 }
200 
ffmpeg_alloc(struct a12_channel * ch,int method)201 static bool ffmpeg_alloc(struct a12_channel* ch, int method)
202 {
203 	bool new_codec = false;
204 
205 	if (!ch->videnc.codec){
206 		ch->videnc.codec = avcodec_find_decoder(method);
207 		if (!ch->videnc.codec){
208 			a12int_trace(A12_TRACE_SYSTEM, "couldn't find h264 decoder");
209 			return false;
210 		}
211 		new_codec = true;
212 	}
213 
214 	if (!ch->videnc.encdec){
215 		ch->videnc.encdec = avcodec_alloc_context3(ch->videnc.codec);
216 		if (!ch->videnc.encdec){
217 			a12int_trace(A12_TRACE_SYSTEM, "couldn't setup h264 codec context");
218 			return false;
219 		}
220 	}
221 
222 /* got the context, but it needs to be 'opened' as well */
223 	if (new_codec){
224 		if (avcodec_open2(ch->videnc.encdec, ch->videnc.codec, NULL ) < 0)
225 			return false;
226 	}
227 
228 	if (!ch->videnc.parser){
229 		ch->videnc.parser = av_parser_init(ch->videnc.codec->id);
230 		if (!ch->videnc.parser){
231 			a12int_trace(A12_TRACE_SYSTEM, "kind=ffmpeg_alloc:status=parser_alloc fail");
232 			return false;
233 		}
234 	}
235 
236 	if (!ch->videnc.frame){
237 		ch->videnc.frame = av_frame_alloc();
238 		if (!ch->videnc.frame){
239 			a12int_trace(A12_TRACE_SYSTEM, "kind=ffmpeg_alloc:status=frame_alloc fail");
240 			return false;
241 		}
242 	}
243 
244 /* packet is their chunking mechanism (research if this step can be avoided) */
245 	if (!ch->videnc.packet){
246 		ch->videnc.packet = av_packet_alloc();
247 		if (!ch->videnc.packet){
248 			return false;
249 		}
250 	}
251 
252 	if (new_codec){
253 		a12int_trace(A12_TRACE_VIDEO, "kind=ffmpeg_alloc:status=new_codec:id=%d", method);
254 	}
255 
256 	return true;
257 }
258 #endif
259 
a12int_decode_drop(struct a12_state * S,int chid,bool failed)260 void a12int_decode_drop(struct a12_state* S, int chid, bool failed)
261 {
262 	if (S->channels[chid].unpack_state.vframe.zstd){
263 		ZSTD_freeDCtx(S->channels[chid].unpack_state.vframe.zstd);
264 		S->channels[chid].zstd = NULL;
265 	}
266 
267 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
268 	if (!S->channels[chid].videnc.encdec)
269 		return;
270 
271 #endif
272 }
273 
a12int_vframe_setup(struct a12_channel * ch,struct video_frame * dst,int method)274 bool a12int_vframe_setup(struct a12_channel* ch, struct video_frame* dst, int method)
275 {
276 	*dst = (struct video_frame){};
277 
278 	if (method == POSTPROCESS_VIDEO_H264){
279 #ifdef WANT_H264_DEC
280 		if (!ffmpeg_alloc(ch, AV_CODEC_ID_H264))
281 			return false;
282 
283 /* parser, context, packet, frame, scaler */
284 		dst->ffmpeg.context = ch->videnc.encdec;
285 		dst->ffmpeg.packet = ch->videnc.packet;
286 		dst->ffmpeg.frame = ch->videnc.frame;
287 		dst->ffmpeg.parser = ch->videnc.parser;
288 		dst->ffmpeg.scaler = ch->videnc.scaler;
289 
290 #else
291 		return false;
292 #endif
293 	}
294 	return true;
295 }
296 
a12int_decode_vbuffer(struct a12_state * S,struct a12_channel * ch,struct video_frame * cvf,struct arcan_shmif_cont * cont)297 void a12int_decode_vbuffer(struct a12_state* S,
298 	struct a12_channel* ch, struct video_frame* cvf, struct arcan_shmif_cont* cont)
299 {
300 	a12int_trace(A12_TRACE_VIDEO, "decode vbuffer, method: %d", cvf->postprocess);
301 	if ( cvf->postprocess == POSTPROCESS_VIDEO_DZSTD
302 		|| cvf->postprocess == POSTPROCESS_VIDEO_ZSTD
303 		|| cvf->postprocess == POSTPROCESS_VIDEO_TZSTD)
304 	{
305 		uint64_t content_sz = ZSTD_getFrameContentSize(cvf->inbuf, cvf->inbuf_pos);
306 
307 /* repeat and compare, don't le/gt */
308 		if (content_sz == cvf->expanded_sz){
309 			if (!ch->unpack_state.vframe.zstd &&
310 				!(ch->unpack_state.vframe.zstd = ZSTD_createDCtx())){
311 				a12int_trace(A12_TRACE_SYSTEM,
312 					"kind=alloc_error:zstd_context_alloc");
313 			}
314 /* actually decompress */
315 			else {
316 				void* buffer = malloc(content_sz);
317 				if (buffer){
318 					uint64_t decode =
319 						ZSTD_decompressDCtx(ch->unpack_state.vframe.zstd,
320 							buffer, content_sz, cvf->inbuf, cvf->inbuf_pos);
321 					a12int_trace(A12_TRACE_VIDEO, "kind=ztd_state:%"PRIu64, decode);
322 					video_miniz(buffer, content_sz, S);
323 					free(buffer);
324 				}
325 			}
326 		}
327 		else {
328 			a12int_trace(A12_TRACE_SYSTEM,
329 				"kind=decode_error:in_sz=%zu:exp_sz=%zu:message=size mismatch",
330 				(size_t) content_sz, (size_t) cvf->expanded_sz
331 			);
332 		}
333 
334 		free(cvf->inbuf);
335 		cvf->inbuf = NULL;
336 		cvf->carry = 0;
337 
338 /* this is a junction where other local transfer strategies should be considered,
339  * i.e. no-block and defer process on the next stepframe or spin on the vready */
340 		if (cvf->commit && cvf->commit != 255){
341 			drain_video(ch, cvf);
342 		}
343 		return;
344 	}
345 #ifdef WANT_H264_DEC
346 	else if (cvf->postprocess == POSTPROCESS_VIDEO_H264){
347 /* just keep it around after first time of use */
348 /* since these are stateful, we need to tie them to the channel dynamically */
349 		a12int_trace(A12_TRACE_VIDEO,
350 			"kind=ffmpeg_state:parser=%"PRIxPTR
351 			":context=%"PRIxPTR
352 			":inbuf_size=%zu",
353 			(uintptr_t)cvf->ffmpeg.parser,
354 			(uintptr_t)cvf->ffmpeg.context,
355 			(size_t)cvf->inbuf_pos
356 		);
357 
358 #define DUMP_COMPRESSED
359 #ifdef DUMP_COMPRESSED
360 		static FILE* outf;
361 		if (!outf)
362 			outf = fopen("raw.h264", "w");
363 		fwrite(cvf->inbuf, cvf->inbuf_pos, 1, outf);
364 #endif
365 
366 /* parser_parse2 can short-read */
367 		ssize_t ofs = 0;
368 		while (cvf->inbuf_pos - ofs > 0){
369 			int ret =
370 				av_parser_parse2(cvf->ffmpeg.parser, cvf->ffmpeg.context,
371 				&cvf->ffmpeg.packet->data, &cvf->ffmpeg.packet->size,
372 				&cvf->inbuf[ofs], cvf->inbuf_pos - ofs, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0
373 			);
374 
375 			if (ret < 0){
376 				a12int_trace(A12_TRACE_VIDEO, "kind=ffmpeg_state:parser=broken:code=%d", ret);
377 				cvf->commit = 255;
378 				goto out_h264;
379 			}
380 			a12int_trace(A12_TRACE_VDETAIL, "kind=parser:return=%d:"
381 				"packet_sz=%d:ofset=%zd", ret, cvf->ffmpeg.packet->size, ofs);
382 
383 			ofs += ret;
384 			if (cvf->ffmpeg.packet->data){
385 				ffmpeg_decode_pkt(S, cvf, cont);
386 			}
387 		}
388 
389 out_h264:
390 		free(cvf->inbuf);
391 		cvf->inbuf = NULL;
392 		cvf->carry = 0;
393 		return;
394 	}
395 #endif
396 
397 	a12int_trace(A12_TRACE_SYSTEM, "unhandled unpack method %d", cvf->postprocess);
398 /* NOTE: should we send something about an undesired frame format here as
399  * well in order to let the source re-send the frame in another format?
400  * that could offset the need to 'negotiate' */
401 }
402 
a12int_unpack_vbuffer(struct a12_state * S,struct video_frame * cvf,struct arcan_shmif_cont * cont)403 void a12int_unpack_vbuffer(struct a12_state* S,
404 	struct video_frame* cvf, struct arcan_shmif_cont* cont)
405 {
406 /* raw frame types, the implementations and variations are so small that
407  * we can just do it here - no need for the more complex stages like for
408  * 264, ... */
409 	if (cvf->postprocess == POSTPROCESS_VIDEO_RGBA){
410 		for (size_t i = 0; i < S->decode_pos; i += 4){
411 			cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
412 				S->decode[i+0], S->decode[i+1], S->decode[i+2], S->decode[i+3]);
413 			cvf->row_left--;
414 			if (cvf->row_left == 0){
415 				cvf->out_pos -= cvf->w;
416 				cvf->out_pos += cont->pitch;
417 				cvf->row_left = cvf->w;
418 			}
419 		}
420 	}
421 	else if (cvf->postprocess == POSTPROCESS_VIDEO_RGB){
422 		for (size_t i = 0; i < S->decode_pos; i += 3){
423 			cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
424 				S->decode[i+0], S->decode[i+1], S->decode[i+2], 0xff);
425 			cvf->row_left--;
426 			if (cvf->row_left == 0){
427 				cvf->out_pos -= cvf->w;
428 				cvf->out_pos += cont->pitch;
429 				cvf->row_left = cvf->w;
430 			}
431 		}
432 	}
433 	else if (cvf->postprocess == POSTPROCESS_VIDEO_RGB565){
434 		static const uint8_t rgb565_lut5[] = {
435 			0,     8,  16,  25,  33,  41,  49,  58,  66,   74,  82,  90,  99, 107,
436 			115, 123, 132, 140, 148, 156, 165, 173, 181, 189,  197, 206, 214, 222,
437 			230, 239, 247, 255
438 		};
439 
440 		static const uint8_t rgb565_lut6[] = {
441 			0,     4,   8,  12,  16,  20,  24,  28,  32,  36,  40,  45,  49,  53,  57,
442 			61,   65,  69,  73,  77,  81,  85,  89,  93,  97, 101, 105, 109, 113, 117,
443 			121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174,
444 			178, 182, 186, 190, 194, 198, 202, 206, 210, 215, 219, 223, 227, 231,
445 			235, 239, 243, 247, 251, 255
446 		};
447 
448 		for (size_t i = 0; i < S->decode_pos; i += 2){
449 			uint16_t px;
450 			unpack_u16(&px, &S->decode[i]);
451 			cont->vidp[cvf->out_pos++] =
452 				SHMIF_RGBA(
453 					rgb565_lut5[ (px & 0xf800) >> 11],
454 					rgb565_lut6[ (px & 0x07e0) >>  5],
455 					rgb565_lut5[ (px & 0x001f)      ],
456 					0xff
457 				);
458 			cvf->row_left--;
459 			if (cvf->row_left == 0){
460 				cvf->out_pos -= cvf->w;
461 				cvf->out_pos += cont->pitch;
462 				cvf->row_left = cvf->w;
463 			}
464 		}
465 	}
466 
467 	cvf->inbuf_sz -= S->decode_pos;
468 	if (cvf->inbuf_sz == 0){
469 		a12int_trace(A12_TRACE_VIDEO,
470 			"video frame completed, commit:%"PRIu8, cvf->commit);
471 		a12int_stream_ack(S, S->in_channel, cvf->id);
472 		if (cvf->commit){
473 			arcan_shmif_signal(cont, SHMIF_SIGVID);
474 		}
475 	}
476 	else {
477 		a12int_trace(A12_TRACE_VDETAIL, "video buffer left: %"PRIu32, cvf->inbuf_sz);
478 	}
479 }
480