1 /*
2 * Copyright: 2017-2018, Björn Ståhl
3 * Description: A12 protocol state machine, substream decoding routines
4 * License: 3-Clause BSD, see COPYING file in arcan source repository.
5 * Reference: https://arcan-fe.com
6 */
7 #include <arcan_shmif.h>
8 #include <arcan_shmif_server.h>
9
10 #include <inttypes.h>
11 #include <string.h>
12 #include <math.h>
13
14 #include "a12.h"
15 #include "a12_int.h"
16 #include "zstd.h"
17
18 #ifdef LOG_FRAME_OUTPUT
19 #define STB_IMAGE_WRITE_STATIC
20 #define STB_IMAGE_WRITE_IMPLEMENTATION
21 #define STB_IMAGE_IMPLEMENTATION
22 #include "../../engine/external/stb_image_write.h"
23 #endif
24
drain_video(struct a12_channel * ch,struct video_frame * cvf)25 static void drain_video(struct a12_channel* ch, struct video_frame* cvf)
26 {
27 cvf->commit = 0;
28 if (ch->active == CHANNEL_RAW){
29 a12int_trace(A12_TRACE_VIDEO,
30 "kind=drain:dest=user:ts=%llu", arcan_timemillis());
31
32 if (ch->raw.signal_video){
33 ch->raw.signal_video(cvf->x, cvf->y,
34 cvf->x + cvf->w, cvf->y + cvf->h, ch->raw.tag);
35 }
36 return;
37 }
38
39 a12int_trace(A12_TRACE_VIDEO,
40 "kind=drain:dest=%"PRIxPTR":ts=%llu", (uintptr_t) ch->cont, arcan_timemillis());
41 arcan_shmif_signal(ch->cont, SHMIF_SIGVID);
42 }
43
a12int_buffer_format(int method)44 bool a12int_buffer_format(int method)
45 {
46 return
47 method == POSTPROCESS_VIDEO_H264 ||
48 method == POSTPROCESS_VIDEO_TZSTD ||
49 method == POSTPROCESS_VIDEO_ZSTD ||
50 method == POSTPROCESS_VIDEO_DZSTD;
51 }
52
video_miniz(const void * buf,int len,void * user)53 static int video_miniz(const void* buf, int len, void* user)
54 {
55 struct a12_state* S = user;
56 struct video_frame* cvf = &S->channels[S->in_channel].unpack_state.vframe;
57 struct arcan_shmif_cont* cont = S->channels[S->in_channel].cont;
58 const uint8_t* inbuf = buf;
59
60 if (!cont || len > cvf->expanded_sz){
61 a12int_trace(A12_TRACE_SYSTEM, "decompression resulted in data overcommit");
62 return 0;
63 }
64
65 /* we have a 1..4 byte spill from a previous call so we need to have
66 * a 1-px buffer that we populate before packing */
67 if (cvf->carry){
68 while (cvf->carry < 3){
69 cvf->pxbuf[cvf->carry++] = *inbuf++;
70 len--;
71
72 /* and this spill can also be short */
73 if (!len)
74 return 1;
75 }
76
77 /* and commit */
78 if (cvf->postprocess == POSTPROCESS_VIDEO_DZSTD){
79 uint8_t r, g, b, a;
80 SHMIF_RGBA_DECOMP(cont->vidp[cvf->out_pos], &r, &g, &b, &a);
81
82 cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
83 cvf->pxbuf[0] ^ r,
84 cvf->pxbuf[1] ^ g,
85 cvf->pxbuf[2] ^ b,
86 0xff
87 );
88 }
89 else
90 cont->vidp[cvf->out_pos++] =
91 SHMIF_RGBA(cvf->pxbuf[0], cvf->pxbuf[1], cvf->pxbuf[2], 0xff);
92
93 /* which can happen on a row boundary */
94 cvf->row_left--;
95 if (cvf->row_left == 0){
96 cvf->out_pos -= cvf->w;
97 cvf->out_pos += cont->pitch;
98 cvf->row_left = cvf->w;
99 }
100 cvf->carry = 0;
101 }
102
103 /* tpack is easier, just write into vidb, ensure that we don't exceed
104 * the size from a missed resize_ call and the rest is done consumer side */
105 if (cvf->postprocess == POSTPROCESS_VIDEO_TZSTD){
106 memcpy(&cont->vidb[cvf->out_pos], inbuf, len);
107 cvf->out_pos += len;
108 cvf->expanded_sz -= len;
109 return 1;
110 }
111
112 /* pixel-aligned fill/unpack, same as everywhere else */
113 size_t npx = (len / 3) * 3;
114 for (size_t i = 0; i < npx; i += 3){
115 if (cvf->postprocess == POSTPROCESS_VIDEO_DZSTD){
116 uint8_t r, g, b, a;
117 SHMIF_RGBA_DECOMP(cont->vidp[cvf->out_pos], &r, &g, &b, &a);
118
119 cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
120 inbuf[i+0] ^ r,
121 inbuf[i+1] ^ g,
122 inbuf[i+2] ^ b,
123 0xff
124 );
125 }
126 else{
127 cont->vidp[cvf->out_pos++] =
128 SHMIF_RGBA(inbuf[i], inbuf[i+1], inbuf[i+2], 0xff);
129 }
130
131 cvf->row_left--;
132 if (cvf->row_left == 0){
133 cvf->out_pos -= cvf->w;
134 cvf->out_pos += cont->pitch;
135 cvf->row_left = cvf->w;
136 }
137 }
138
139 /* we need to account for len bytes not aligning */
140 if (len - npx){
141 cvf->carry = 0;
142 for (size_t i = 0; i < len - npx; i++){
143 cvf->pxbuf[cvf->carry++] = inbuf[npx + i];
144 }
145 }
146
147 cvf->expanded_sz -= len;
148 return 1;
149 }
150
151 #ifdef WANT_H264_DEC
152
ffmpeg_decode_pkt(struct a12_state * S,struct video_frame * cvf,struct arcan_shmif_cont * cont)153 void ffmpeg_decode_pkt(
154 struct a12_state* S, struct video_frame* cvf, struct arcan_shmif_cont* cont)
155 {
156 a12int_trace(A12_TRACE_VIDEO,
157 "ffmpeg:packet_size=%d", cvf->ffmpeg.packet->size);
158 int ret = avcodec_send_packet(cvf->ffmpeg.context, cvf->ffmpeg.packet);
159 if (ret < 0){
160 a12int_trace(A12_TRACE_VIDEO, "ffmpeg:packet_status=decode_fail");
161 a12_vstream_cancel(S, S->in_channel, STREAM_CANCEL_DECODE_ERROR);
162 return;
163 }
164
165 while (ret >= 0){
166 ret = avcodec_receive_frame(cvf->ffmpeg.context, cvf->ffmpeg.frame);
167 if (ret == AVERROR(EAGAIN) || ret == AVERROR(EOF)){
168 a12int_trace(A12_TRACE_VIDEO, "ffmpeg:avcodec=again|eof:value=%d", ret);
169 return;
170 }
171 else if (ret != 0){
172 a12int_trace(A12_TRACE_SYSTEM, "ffmpeg:avcodec=fail:code=%d", ret);
173 a12_vstream_cancel(S, S->in_channel, STREAM_CANCEL_DECODE_ERROR);
174 return;
175 }
176
177 a12int_trace(A12_TRACE_VIDEO,
178 "ffmpeg:kind=convert:commit=%d:format=yub420p", cvf->commit);
179 /* Quite possible that we should actually cache this context as well, but it
180 * has different behavior to the rest due to resize. Since this all turns
181 * ffmpeg into a dependency, maybe it belongs in the vframe setup on resize. */
182 struct SwsContext* scaler =
183 sws_getContext(cvf->w, cvf->h, AV_PIX_FMT_YUV420P,
184 cvf->w, cvf->h, AV_PIX_FMT_BGRA, SWS_BILINEAR, NULL, NULL, NULL);
185
186 uint8_t* const dst[] = {cont->vidb};
187 int dst_stride[] = {cont->stride};
188
189 sws_scale(scaler, (const uint8_t* const*) cvf->ffmpeg.frame->data,
190 cvf->ffmpeg.frame->linesize, 0, cvf->h, dst, dst_stride);
191
192 /* Mark that we should send a ping so the other side can update the drift wnd */
193 if (cvf->commit && cvf->commit != 255){
194 drain_video(&S->channels[S->in_channel], cvf);
195 }
196
197 sws_freeContext(scaler);
198 }
199 }
200
ffmpeg_alloc(struct a12_channel * ch,int method)201 static bool ffmpeg_alloc(struct a12_channel* ch, int method)
202 {
203 bool new_codec = false;
204
205 if (!ch->videnc.codec){
206 ch->videnc.codec = avcodec_find_decoder(method);
207 if (!ch->videnc.codec){
208 a12int_trace(A12_TRACE_SYSTEM, "couldn't find h264 decoder");
209 return false;
210 }
211 new_codec = true;
212 }
213
214 if (!ch->videnc.encdec){
215 ch->videnc.encdec = avcodec_alloc_context3(ch->videnc.codec);
216 if (!ch->videnc.encdec){
217 a12int_trace(A12_TRACE_SYSTEM, "couldn't setup h264 codec context");
218 return false;
219 }
220 }
221
222 /* got the context, but it needs to be 'opened' as well */
223 if (new_codec){
224 if (avcodec_open2(ch->videnc.encdec, ch->videnc.codec, NULL ) < 0)
225 return false;
226 }
227
228 if (!ch->videnc.parser){
229 ch->videnc.parser = av_parser_init(ch->videnc.codec->id);
230 if (!ch->videnc.parser){
231 a12int_trace(A12_TRACE_SYSTEM, "kind=ffmpeg_alloc:status=parser_alloc fail");
232 return false;
233 }
234 }
235
236 if (!ch->videnc.frame){
237 ch->videnc.frame = av_frame_alloc();
238 if (!ch->videnc.frame){
239 a12int_trace(A12_TRACE_SYSTEM, "kind=ffmpeg_alloc:status=frame_alloc fail");
240 return false;
241 }
242 }
243
244 /* packet is their chunking mechanism (research if this step can be avoided) */
245 if (!ch->videnc.packet){
246 ch->videnc.packet = av_packet_alloc();
247 if (!ch->videnc.packet){
248 return false;
249 }
250 }
251
252 if (new_codec){
253 a12int_trace(A12_TRACE_VIDEO, "kind=ffmpeg_alloc:status=new_codec:id=%d", method);
254 }
255
256 return true;
257 }
258 #endif
259
a12int_decode_drop(struct a12_state * S,int chid,bool failed)260 void a12int_decode_drop(struct a12_state* S, int chid, bool failed)
261 {
262 if (S->channels[chid].unpack_state.vframe.zstd){
263 ZSTD_freeDCtx(S->channels[chid].unpack_state.vframe.zstd);
264 S->channels[chid].zstd = NULL;
265 }
266
267 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
268 if (!S->channels[chid].videnc.encdec)
269 return;
270
271 #endif
272 }
273
a12int_vframe_setup(struct a12_channel * ch,struct video_frame * dst,int method)274 bool a12int_vframe_setup(struct a12_channel* ch, struct video_frame* dst, int method)
275 {
276 *dst = (struct video_frame){};
277
278 if (method == POSTPROCESS_VIDEO_H264){
279 #ifdef WANT_H264_DEC
280 if (!ffmpeg_alloc(ch, AV_CODEC_ID_H264))
281 return false;
282
283 /* parser, context, packet, frame, scaler */
284 dst->ffmpeg.context = ch->videnc.encdec;
285 dst->ffmpeg.packet = ch->videnc.packet;
286 dst->ffmpeg.frame = ch->videnc.frame;
287 dst->ffmpeg.parser = ch->videnc.parser;
288 dst->ffmpeg.scaler = ch->videnc.scaler;
289
290 #else
291 return false;
292 #endif
293 }
294 return true;
295 }
296
a12int_decode_vbuffer(struct a12_state * S,struct a12_channel * ch,struct video_frame * cvf,struct arcan_shmif_cont * cont)297 void a12int_decode_vbuffer(struct a12_state* S,
298 struct a12_channel* ch, struct video_frame* cvf, struct arcan_shmif_cont* cont)
299 {
300 a12int_trace(A12_TRACE_VIDEO, "decode vbuffer, method: %d", cvf->postprocess);
301 if ( cvf->postprocess == POSTPROCESS_VIDEO_DZSTD
302 || cvf->postprocess == POSTPROCESS_VIDEO_ZSTD
303 || cvf->postprocess == POSTPROCESS_VIDEO_TZSTD)
304 {
305 uint64_t content_sz = ZSTD_getFrameContentSize(cvf->inbuf, cvf->inbuf_pos);
306
307 /* repeat and compare, don't le/gt */
308 if (content_sz == cvf->expanded_sz){
309 if (!ch->unpack_state.vframe.zstd &&
310 !(ch->unpack_state.vframe.zstd = ZSTD_createDCtx())){
311 a12int_trace(A12_TRACE_SYSTEM,
312 "kind=alloc_error:zstd_context_alloc");
313 }
314 /* actually decompress */
315 else {
316 void* buffer = malloc(content_sz);
317 if (buffer){
318 uint64_t decode =
319 ZSTD_decompressDCtx(ch->unpack_state.vframe.zstd,
320 buffer, content_sz, cvf->inbuf, cvf->inbuf_pos);
321 a12int_trace(A12_TRACE_VIDEO, "kind=ztd_state:%"PRIu64, decode);
322 video_miniz(buffer, content_sz, S);
323 free(buffer);
324 }
325 }
326 }
327 else {
328 a12int_trace(A12_TRACE_SYSTEM,
329 "kind=decode_error:in_sz=%zu:exp_sz=%zu:message=size mismatch",
330 (size_t) content_sz, (size_t) cvf->expanded_sz
331 );
332 }
333
334 free(cvf->inbuf);
335 cvf->inbuf = NULL;
336 cvf->carry = 0;
337
338 /* this is a junction where other local transfer strategies should be considered,
339 * i.e. no-block and defer process on the next stepframe or spin on the vready */
340 if (cvf->commit && cvf->commit != 255){
341 drain_video(ch, cvf);
342 }
343 return;
344 }
345 #ifdef WANT_H264_DEC
346 else if (cvf->postprocess == POSTPROCESS_VIDEO_H264){
347 /* just keep it around after first time of use */
348 /* since these are stateful, we need to tie them to the channel dynamically */
349 a12int_trace(A12_TRACE_VIDEO,
350 "kind=ffmpeg_state:parser=%"PRIxPTR
351 ":context=%"PRIxPTR
352 ":inbuf_size=%zu",
353 (uintptr_t)cvf->ffmpeg.parser,
354 (uintptr_t)cvf->ffmpeg.context,
355 (size_t)cvf->inbuf_pos
356 );
357
358 #define DUMP_COMPRESSED
359 #ifdef DUMP_COMPRESSED
360 static FILE* outf;
361 if (!outf)
362 outf = fopen("raw.h264", "w");
363 fwrite(cvf->inbuf, cvf->inbuf_pos, 1, outf);
364 #endif
365
366 /* parser_parse2 can short-read */
367 ssize_t ofs = 0;
368 while (cvf->inbuf_pos - ofs > 0){
369 int ret =
370 av_parser_parse2(cvf->ffmpeg.parser, cvf->ffmpeg.context,
371 &cvf->ffmpeg.packet->data, &cvf->ffmpeg.packet->size,
372 &cvf->inbuf[ofs], cvf->inbuf_pos - ofs, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0
373 );
374
375 if (ret < 0){
376 a12int_trace(A12_TRACE_VIDEO, "kind=ffmpeg_state:parser=broken:code=%d", ret);
377 cvf->commit = 255;
378 goto out_h264;
379 }
380 a12int_trace(A12_TRACE_VDETAIL, "kind=parser:return=%d:"
381 "packet_sz=%d:ofset=%zd", ret, cvf->ffmpeg.packet->size, ofs);
382
383 ofs += ret;
384 if (cvf->ffmpeg.packet->data){
385 ffmpeg_decode_pkt(S, cvf, cont);
386 }
387 }
388
389 out_h264:
390 free(cvf->inbuf);
391 cvf->inbuf = NULL;
392 cvf->carry = 0;
393 return;
394 }
395 #endif
396
397 a12int_trace(A12_TRACE_SYSTEM, "unhandled unpack method %d", cvf->postprocess);
398 /* NOTE: should we send something about an undesired frame format here as
399 * well in order to let the source re-send the frame in another format?
400 * that could offset the need to 'negotiate' */
401 }
402
a12int_unpack_vbuffer(struct a12_state * S,struct video_frame * cvf,struct arcan_shmif_cont * cont)403 void a12int_unpack_vbuffer(struct a12_state* S,
404 struct video_frame* cvf, struct arcan_shmif_cont* cont)
405 {
406 /* raw frame types, the implementations and variations are so small that
407 * we can just do it here - no need for the more complex stages like for
408 * 264, ... */
409 if (cvf->postprocess == POSTPROCESS_VIDEO_RGBA){
410 for (size_t i = 0; i < S->decode_pos; i += 4){
411 cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
412 S->decode[i+0], S->decode[i+1], S->decode[i+2], S->decode[i+3]);
413 cvf->row_left--;
414 if (cvf->row_left == 0){
415 cvf->out_pos -= cvf->w;
416 cvf->out_pos += cont->pitch;
417 cvf->row_left = cvf->w;
418 }
419 }
420 }
421 else if (cvf->postprocess == POSTPROCESS_VIDEO_RGB){
422 for (size_t i = 0; i < S->decode_pos; i += 3){
423 cont->vidp[cvf->out_pos++] = SHMIF_RGBA(
424 S->decode[i+0], S->decode[i+1], S->decode[i+2], 0xff);
425 cvf->row_left--;
426 if (cvf->row_left == 0){
427 cvf->out_pos -= cvf->w;
428 cvf->out_pos += cont->pitch;
429 cvf->row_left = cvf->w;
430 }
431 }
432 }
433 else if (cvf->postprocess == POSTPROCESS_VIDEO_RGB565){
434 static const uint8_t rgb565_lut5[] = {
435 0, 8, 16, 25, 33, 41, 49, 58, 66, 74, 82, 90, 99, 107,
436 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 197, 206, 214, 222,
437 230, 239, 247, 255
438 };
439
440 static const uint8_t rgb565_lut6[] = {
441 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 45, 49, 53, 57,
442 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117,
443 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174,
444 178, 182, 186, 190, 194, 198, 202, 206, 210, 215, 219, 223, 227, 231,
445 235, 239, 243, 247, 251, 255
446 };
447
448 for (size_t i = 0; i < S->decode_pos; i += 2){
449 uint16_t px;
450 unpack_u16(&px, &S->decode[i]);
451 cont->vidp[cvf->out_pos++] =
452 SHMIF_RGBA(
453 rgb565_lut5[ (px & 0xf800) >> 11],
454 rgb565_lut6[ (px & 0x07e0) >> 5],
455 rgb565_lut5[ (px & 0x001f) ],
456 0xff
457 );
458 cvf->row_left--;
459 if (cvf->row_left == 0){
460 cvf->out_pos -= cvf->w;
461 cvf->out_pos += cont->pitch;
462 cvf->row_left = cvf->w;
463 }
464 }
465 }
466
467 cvf->inbuf_sz -= S->decode_pos;
468 if (cvf->inbuf_sz == 0){
469 a12int_trace(A12_TRACE_VIDEO,
470 "video frame completed, commit:%"PRIu8, cvf->commit);
471 a12int_stream_ack(S, S->in_channel, cvf->id);
472 if (cvf->commit){
473 arcan_shmif_signal(cont, SHMIF_SIGVID);
474 }
475 }
476 else {
477 a12int_trace(A12_TRACE_VDETAIL, "video buffer left: %"PRIu32, cvf->inbuf_sz);
478 }
479 }
480