1 /*
2 * Copyright: Björn Ståhl
3 * Description: A12 protocol state machine
4 * License: 3-Clause BSD, see COPYING file in arcan source repository.
5 * Reference: https://arcan-fe.com
6 */
7 #include <arcan_shmif.h>
8 #include <arcan_shmif_server.h>
9
10 #include <inttypes.h>
11 #include <string.h>
12 #include <math.h>
13
14 #include "a12.h"
15 #include "a12_int.h"
16 #include "a12_encode.h"
17
18 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
19 #include "zstd.h"
20
21 /*
22 * create the control packet
23 */
a12int_vframehdr_build(uint8_t buf[CONTROL_PACKET_SIZE],uint64_t last_seen,uint8_t chid,int type,uint32_t sid,uint16_t sw,uint16_t sh,uint16_t w,uint16_t h,uint16_t x,uint16_t y,uint32_t len,uint32_t exp_len,bool commit,uint8_t flags)24 static void a12int_vframehdr_build(
25 uint8_t buf[CONTROL_PACKET_SIZE],
26 uint64_t last_seen, uint8_t chid,
27 int type, uint32_t sid,
28 uint16_t sw, uint16_t sh, uint16_t w, uint16_t h, uint16_t x, uint16_t y,
29 uint32_t len, uint32_t exp_len, bool commit, uint8_t flags)
30 {
31 a12int_trace(A12_TRACE_VDETAIL,
32 "kind=header:ch=%"PRIu8":type=%d:stream=%"PRIu32
33 ":sw=%"PRIu16":sh=%"PRIu16":w=%"PRIu16":h=%"PRIu16":x=%"PRIu16
34 ":y=%"PRIu16":len=%"PRIu32":exp_len=%"PRIu32,
35 chid, type, sid, sw, sh, w, h, x, y, len, exp_len
36 );
37
38 memset(buf, '\0', CONTROL_PACKET_SIZE);
39 pack_u64(last_seen, &buf[0]);
40 arcan_random(&buf[8], 8); /* 0..8 entropy */
41
42 buf[16] = chid; /* [16] : channel-id */
43 buf[17] = COMMAND_VIDEOFRAME; /* [17] : command */
44 pack_u32(sid, &buf[18]); /* [18..21] : stream-id */
45 buf[22] = type; /* [22] : type */
46 pack_u16(sw, &buf[23]); /* [23..24] : surfacew */
47 pack_u16(sh, &buf[25]); /* [25..26] : surfaceh */
48 pack_u16(x, &buf[27]); /* [27..28] : startx */
49 pack_u16(y, &buf[29]); /* [29..30] : starty */
50 pack_u16(w, &buf[31]); /* [31..32] : framew */
51 pack_u16(h, &buf[33]); /* [33..34] : frameh */
52 pack_u32(len, &buf[36]); /* [36..39] : length */
53 pack_u32(exp_len, &buf[40]); /* [40..43] : exp-length */
54
55 buf[35] = flags; /* [35] : dataflags: uint8 */
56
57 /* [40] Commit on completion, this is always set right now but will change
58 * when 'chain of deltas' mode for shmif is added */
59 buf[44] = commit;
60 }
61
62 /*
63 * Need to chunk up a binary stream that do not have intermediate headers, that
64 * typically comes with the compression / h264 / ... output. To avoid yet
65 * another copy, we use the prepend mechanism in a12int_append_out.
66 */
chunk_pack(struct a12_state * S,int type,uint8_t chid,uint8_t * buf,size_t buf_sz,size_t chunk_sz)67 static void chunk_pack(struct a12_state* S, int type,
68 uint8_t chid, uint8_t* buf, size_t buf_sz, size_t chunk_sz)
69 {
70 size_t n_chunks = buf_sz / chunk_sz;
71
72 uint8_t outb[a12int_header_size(type)];
73 outb[0] = chid; /* [0] : channel id */
74 pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
75 pack_u16(chunk_sz, &outb[5]); /* [5..6] : length */
76
77 for (size_t i = 0; i < n_chunks; i++){
78 a12int_append_out(S, type, &buf[i * chunk_sz], chunk_sz, outb, sizeof(outb));
79 }
80
81 size_t left = buf_sz - n_chunks * chunk_sz;
82 pack_u16(left, &outb[5]); /* [5..6] : length */
83 if (left)
84 a12int_append_out(S, type, &buf[n_chunks * chunk_sz], left, outb, sizeof(outb));
85 }
86
a12int_encode_araw(struct a12_state * S,uint8_t chid,shmif_asample * buf,uint16_t n_samples,struct a12_aframe_cfg cfg,struct a12_aframe_opts opts,size_t chunk_sz)87 void a12int_encode_araw(struct a12_state* S,
88 uint8_t chid,
89 shmif_asample* buf,
90 uint16_t n_samples,
91 struct a12_aframe_cfg cfg,
92 struct a12_aframe_opts opts, size_t chunk_sz)
93 {
94 /* repack the audio into a temporary buffer for format reasons */
95 size_t hdr_sz = a12int_header_size(STATE_AUDIO_PACKET);
96 size_t buf_sz = hdr_sz + n_samples * sizeof(uint16_t) * cfg.channels;
97 uint8_t* outb = malloc(hdr_sz + buf_sz);;
98 if (!outb){
99 a12int_trace(A12_TRACE_ALLOC,
100 "failed to alloc %zu for s16aud", buf_sz);
101 return;
102 }
103
104 /* audio control message header */
105 outb[16] = chid;
106 outb[17] = COMMAND_AUDIOFRAME;
107 pack_u32(0, &outb[18]); /* stream-id */
108 outb[22] = cfg.channels; /* channels */
109 outb[23] = 0; /* encoding, u16 */
110 pack_u16(n_samples, &outb[24]);
111
112 /* repack into the right format (note, need _Generic on asample) */
113 size_t pos = hdr_sz;
114 for (size_t i = 0; i < n_samples; i++, pos += 2){
115 pack_s16(buf[i], &outb[pos]);
116 }
117
118 /* then split it up (though likely we get fed much smaller chunks) */
119 a12int_append_out(S,
120 STATE_CONTROL_PACKET, outb, CONTROL_PACKET_SIZE, NULL, 0);
121 chunk_pack(S, STATE_AUDIO_PACKET, chid, &outb[hdr_sz], pos - hdr_sz, chunk_sz);
122 free(outb);
123 }
124
125 /*
126 * the rgb565, rgb and rgba function all follow the same pattern
127 */
a12int_encode_rgb565(PACK_ARGS)128 void a12int_encode_rgb565(PACK_ARGS)
129 {
130 size_t px_sz = 2;
131
132 /* calculate chunk sizes based on a fitting amount of pixels */
133 size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
134 size_t ppb = (chunk_sz - hdr_sz) / px_sz;
135 size_t bpb = ppb * px_sz;
136 size_t blocks = w * h / ppb;
137
138 shmif_pixel* inbuf = vb->buffer;
139 size_t pos = y * vb->pitch + x;
140
141 /* get the packing buffer, cancel if oom */
142 uint8_t* outb = malloc(hdr_sz + bpb);
143 if (!outb){
144 a12int_trace(A12_TRACE_ALLOC,
145 "failed to alloc %zu for rgb565", hdr_sz + bpb);
146 return;
147 }
148
149 /* store the control frame that defines our video buffer */
150 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
151 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
152 POSTPROCESS_VIDEO_RGB565, sid, vb->w, vb->h, w, h, x, y,
153 w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll);
154 a12int_step_vstream(S, sid);
155 a12int_append_out(S,
156 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
157
158 outb[0] = chid; /* [0] : channel id */
159 pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
160 pack_u16(bpb, &outb[5]); /* [5..6] : length */
161
162 /* sweep the incoming frame, and pack maximum block size */
163 size_t row_len = w;
164 for (size_t i = 0; i < blocks; i++){
165 for (size_t j = 0; j < bpb; j += px_sz){
166 uint8_t r, g, b, ign;
167 uint16_t px;
168 SHMIF_RGBA_DECOMP(inbuf[pos++], &r, &g, &b, &ign);
169 px =
170 (((b >> 3) & 0x1f) << 0) |
171 (((g >> 2) & 0x3f) << 5) |
172 (((r >> 3) & 0x1f) << 11)
173 ;
174 pack_u16(px, &outb[hdr_sz+j]);
175 row_len--;
176 if (row_len == 0){
177 pos += vb->pitch - w;
178 row_len = w;
179 }
180 }
181 a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
182 }
183
184 /* last chunk */
185 size_t left = ((w * h) - (blocks * ppb)) * px_sz;
186 if (left){
187 pack_u16(left, &outb[5]);
188 a12int_trace(A12_TRACE_VDETAIL, "small block of %zu bytes", left);
189 for (size_t i = 0; i < left; i+= px_sz){
190 uint8_t r, g, b, ign;
191 uint16_t px;
192 SHMIF_RGBA_DECOMP(inbuf[pos++], &r, &g, &b, &ign);
193 px =
194 (((b >> 3) & 0x1f) << 0) |
195 (((g >> 2) & 0x3f) << 5) |
196 (((r >> 3) & 0x1f) << 11)
197 ;
198 pack_u16(px, &outb[hdr_sz+i]);
199 row_len--;
200 if (row_len == 0){
201 pos += vb->pitch - w;
202 row_len = w;
203 }
204 }
205 a12int_append_out(S, STATE_VIDEO_PACKET, outb, left+hdr_sz, NULL, 0);
206 }
207
208 free(outb);
209 }
210
a12int_encode_rgba(PACK_ARGS)211 void a12int_encode_rgba(PACK_ARGS)
212 {
213 size_t px_sz = 4;
214 a12int_trace(A12_TRACE_VDETAIL, "kind=status:codec=rgba");
215
216 /* calculate chunk sizes based on a fitting amount of pixels */
217 size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
218 size_t ppb = (chunk_sz - hdr_sz) / px_sz;
219 size_t bpb = ppb * px_sz;
220 size_t blocks = w * h / ppb;
221
222 shmif_pixel* inbuf = vb->buffer;
223 size_t pos = y * vb->pitch + x;
224
225 /* get the packing buffer, cancel if oom */
226 uint8_t* outb = malloc(hdr_sz + bpb);
227 if (!outb)
228 return;
229
230 /* store the control frame that defines our video buffer */
231 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
232 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
233 POSTPROCESS_VIDEO_RGBA, sid, vb->w, vb->h, w, h, x, y,
234 w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll
235 );
236 a12int_step_vstream(S, sid);
237 a12int_append_out(S,
238 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
239
240 outb[0] = chid; /* [0] : channel id */
241 pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
242 pack_u16(bpb, &outb[5]); /* [5..6] : length */
243
244 /* sweep the incoming frame, and pack maximum block size */
245 size_t row_len = w;
246 for (size_t i = 0; i < blocks; i++){
247 for (size_t j = 0; j < bpb; j += px_sz){
248 uint8_t* dst = &outb[hdr_sz+j];
249 SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &dst[3]);
250 row_len--;
251 if (row_len == 0){
252 pos += vb->pitch - w;
253 row_len = w;
254 }
255 }
256
257 /* dispatch to out-queue(s) */
258 a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
259 }
260
261 /* last chunk */
262 size_t left = ((w * h) - (blocks * ppb)) * px_sz;
263 if (left){
264 pack_u16(left, &outb[5]);
265 a12int_trace(A12_TRACE_VDETAIL,
266 "kind=status:message=padblock:size=%zu", left);
267 for (size_t i = 0; i < left; i+= px_sz){
268 uint8_t* dst = &outb[hdr_sz+i];
269 SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &dst[3]);
270 row_len--;
271 if (row_len == 0){
272 pos += vb->pitch - w;
273 row_len = w;
274 }
275 }
276 a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + left, NULL, 0);
277 }
278
279 free(outb);
280 }
281
a12int_encode_rgb(PACK_ARGS)282 void a12int_encode_rgb(PACK_ARGS)
283 {
284 size_t px_sz = 3;
285 a12int_trace(A12_TRACE_VDETAIL, "kind=status:ch=%"PRIu8"codec=rgb", (uint8_t) chid);
286
287 /* calculate chunk sizes based on a fitting amount of pixels */
288 size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
289 size_t ppb = (chunk_sz - hdr_sz) / px_sz;
290 size_t bpb = ppb * px_sz;
291 size_t blocks = w * h / ppb;
292
293 shmif_pixel* inbuf = vb->buffer;
294 size_t pos = y * vb->pitch + x;
295
296 /* get the packing buffer, cancel if oom */
297 uint8_t* outb = malloc(hdr_sz + bpb);
298 if (!outb)
299 return;
300
301 /* store the control frame that defines our video buffer */
302 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
303 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
304 POSTPROCESS_VIDEO_RGB, sid, vb->w, vb->h, w, h, x, y,
305 w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll
306 );
307 a12int_step_vstream(S, sid);
308 a12int_append_out(S,
309 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
310
311 outb[0] = chid; /* [0] : channel id */
312 pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
313 pack_u16(bpb, &outb[5]); /* [5..6] : length */
314
315 /* sweep the incoming frame, and pack maximum block size */
316 size_t row_len = w;
317 for (size_t i = 0; i < blocks; i++){
318 for (size_t j = 0; j < bpb; j += px_sz){
319 uint8_t ign;
320 uint8_t* dst = &outb[hdr_sz+j];
321 SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &ign);
322 row_len--;
323 if (row_len == 0){
324 pos += vb->pitch - w;
325 row_len = w;
326 }
327 }
328
329 /* dispatch to out-queue(s) */
330 a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
331 }
332
333 /* pack the last chunk (if w * h % ppb != 0)
334 */
335 size_t bytes_left = ((w * h) - (blocks * ppb)) * px_sz;
336 if (bytes_left){
337 size_t ofs = 0;
338 pack_u16(bytes_left, &outb[5]);
339
340 while (bytes_left - ofs){
341 uint8_t ign;
342 uint8_t* dst = &outb[hdr_sz+ofs];
343 SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &ign);
344 ofs += px_sz;
345
346 row_len--;
347 if (row_len == 0){
348 pos += vb->pitch - w;
349 row_len = w;
350 }
351 }
352
353 a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bytes_left, NULL, 0);
354 }
355
356 free(outb);
357 }
358
359 /* Model indicates which pre-trained model to use, this is currently only
360 * used for TPACK but if there is more domain information to be had, here
361 * is the slot to patch that in. */
setup_zstd(struct a12_state * S,uint8_t ch,int model)362 static bool setup_zstd(struct a12_state* S, uint8_t ch, int model)
363 {
364 if (!S->channels[ch].zstd){
365 S->channels[ch].zstd = ZSTD_createCCtx();
366 if (!S->channels[ch].zstd){
367 return false;
368 }
369 ZSTD_CCtx_setParameter(S->channels[ch].zstd, ZSTD_c_nbWorkers, 4);
370 }
371
372 return true;
373 }
374
375 struct compress_res {
376 bool ok;
377 uint8_t type;
378 size_t in_sz;
379 size_t out_sz;
380 uint8_t* out_buf;
381 };
382
compress_tzstd(struct a12_state * S,uint8_t ch,struct shmifsrv_vbuffer * vb,uint32_t sid,int w,int h,size_t chunk_sz)383 static void compress_tzstd(struct a12_state* S, uint8_t ch,
384 struct shmifsrv_vbuffer* vb, uint32_t sid, int w, int h, size_t chunk_sz)
385 {
386 if (!setup_zstd(S, ch, SEGID_TUI)){
387 return;
388 }
389 int type = POSTPROCESS_VIDEO_TZSTD;
390
391 /* full header-size: 4 + 2 + 2 + 1 + 2 + 4 + 1 = 16 bytes */
392 /* first 4 bytes is length */
393 uint32_t compress_in_sz;
394 unpack_u32(&compress_in_sz, vb->buffer_bytes);
395
396 /* second 2 bytes is number of lines (line-header size) */
397 uint16_t n_lines;
398 unpack_u16(&n_lines, &vb->buffer_bytes[4]);
399
400 /* third 2 bytes is number of cells */
401 uint16_t n_cells;
402 unpack_u16(&n_cells, &vb->buffer_bytes[6]);
403
404 /* line-header size (2 + 2 + 2 + 3 = 9 bytes), cell size = 12 bytes) */
405 if (compress_in_sz != n_lines * 9 + n_cells * 12 + 16){
406 a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=corrupt TPACK buffer");
407 return;
408 }
409
410 #ifdef DUMP_TRAIN
411 static size_t counter = 0;
412 char tmpnam[16];
413 snprintf(tmpnam, 16, "tp_%zu.raw", counter);
414 FILE* fout = fopen(tmpnam, "w+");
415 fwrite(vb->buffer_bytes, compress_in_sz, 1, fout);
416 fclose(fout);
417 counter++;
418 #endif
419
420 size_t out_sz;
421 uint8_t* buf;
422 out_sz = ZSTD_compressBound(compress_in_sz);
423 buf = malloc(out_sz);
424
425 out_sz = ZSTD_compressCCtx(
426 S->channels[ch].zstd, buf, out_sz, vb->buffer_bytes, compress_in_sz, 1);
427
428 if (ZSTD_isError(out_sz)){
429 a12int_trace(A12_TRACE_ALLOC,
430 "kind=zstd_fail:message=%s", ZSTD_getErrorName(out_sz));
431 free(buf);
432 return;
433 }
434
435 a12int_trace(A12_TRACE_VDETAIL,
436 "kind=status:codec=dzstd:b_in=%zu:b_out=%zu:ratio=%.2f",
437 (size_t)compress_in_sz,
438 (size_t) out_sz, (float)(compress_in_sz+1.0) / (float)(out_sz+1.0)
439 );
440
441 if (!buf){
442 a12int_trace(A12_TRACE_ALLOC, "failed to build compressed TPACK output");
443 return;
444 }
445
446 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
447 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, ch,
448 type, sid, vb->w, vb->h, w, h, 0, 0,
449 out_sz, compress_in_sz, 1, vb->flags.origo_ll
450 );
451
452 a12int_trace(A12_TRACE_VDETAIL,
453 "kind=status:codec=tpack:b_in=%zu:b_out=%zu",
454 (size_t) compress_in_sz, (size_t) out_sz
455 );
456
457 a12int_step_vstream(S, sid);
458 a12int_append_out(S,
459 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
460
461 chunk_pack(S, STATE_VIDEO_PACKET, ch, buf, out_sz, chunk_sz);
462 free(buf);
463 }
464
a12int_encode_ztz(PACK_ARGS)465 void a12int_encode_ztz(PACK_ARGS)
466 {
467 compress_tzstd(S, chid, vb, sid, w, h, chunk_sz);
468 }
469
compress_deltaz(struct a12_state * S,uint8_t ch,struct shmifsrv_vbuffer * vb,size_t * x,size_t * y,size_t * w,size_t * h,bool zstd)470 static struct compress_res compress_deltaz(struct a12_state* S, uint8_t ch,
471 struct shmifsrv_vbuffer* vb, size_t* x, size_t* y, size_t* w, size_t* h, bool zstd)
472 {
473 int type;
474 uint8_t* compress_in;
475 size_t compress_in_sz = 0;
476 struct shmifsrv_vbuffer* ab = &S->channels[ch].acc;
477
478 /* reset the accumulation buffer so that we rebuild the normal frame */
479 if (ab->w != vb->w || ab->h != vb->h){
480 a12int_trace(A12_TRACE_VIDEO,
481 "kind=resize:ch=%"PRIu8"prev_w=%zu:rev_h=%zu:new_w%zu:new_h=%zu",
482 ch, (size_t) ab->w, (size_t) ab->h, (size_t) vb->w, (size_t) vb->h
483 );
484 free(ab->buffer);
485 free(S->channels[ch].compression);
486 ab->buffer = NULL;
487 S->channels[ch].compression = NULL;
488 }
489
490 if (!setup_zstd(S, ch, SEGID_APPLICATION)){
491 return (struct compress_res){};
492 }
493
494 /* first, reset or no-delta mode, build accumulation buffer and copy */
495 if (!ab->buffer){
496 type = POSTPROCESS_VIDEO_ZSTD;
497 *ab = *vb;
498 size_t nb = vb->w * vb->h * 3;
499 ab->buffer = malloc(nb);
500 *w = vb->w;
501 *h = vb->h;
502 *x = 0;
503 *y = 0;
504 a12int_trace(A12_TRACE_VIDEO,
505 "kind=status:ch=%"PRIu8"compress=dpng:message=I", ch);
506
507 if (!ab->buffer)
508 return (struct compress_res){};
509
510 /* the compression buffer stores a ^ b, accumulation is a packed copy of the
511 * contents of the previous input frame, this should provide a better basis for
512 * deflates RLE etc. stages, but also act as an option for us to provide our
513 * cheaper RLE or send out a raw- frame when the RLE didn't work out */
514 S->channels[ch].compression = malloc(nb);
515 compress_in_sz = nb;
516
517 if (!S->channels[ch].compression){
518 free(ab->buffer);
519 ab->buffer = NULL;
520 return (struct compress_res){};
521 }
522
523 /* so accumulation buffer might be tightly packed while the source
524 * buffer do not have to be, thus we need to iterate and do this copy */
525 compress_in = (uint8_t*) ab->buffer;
526 uint8_t* acc = compress_in;
527 size_t ofs = 0;
528 for (size_t y = 0; y < vb->h; y++){
529 for (size_t x = 0; x < vb->w; x++){
530 uint8_t ign;
531 shmif_pixel px = vb->buffer[y*vb->pitch+x];
532 SHMIF_RGBA_DECOMP(px, &acc[ofs], &acc[ofs+1], &acc[ofs+2], &ign);
533 ofs += 3;
534 }
535 }
536 }
537 /* We have a delta frame, use accumulation buffer as a way to calculate a ^ b
538 * and store ^ b. For smaller regions, we might want to do something simpler
539 * like RLE only. The flags (,0) can be derived with the _zip helper */
540 else {
541 a12int_trace(A12_TRACE_VDETAIL,
542 "kind=status:ch=%"PRIu8"dw=%zu:dh=%zu:x=%zu:y=%zu",
543 ch, (size_t)*w, (size_t)*h, (size_t) *x, (size_t) *y
544 );
545 compress_in = S->channels[ch].compression;
546 uint8_t* acc = (uint8_t*) ab->buffer;
547 for (size_t cy = (*y); cy < (*y)+(*h); cy++){
548 size_t rs = (cy * ab->w + (*x)) * 3;
549
550 for (size_t cx = *x; cx < (*x)+(*w); cx++){
551 uint8_t r, g, b, ign;
552 shmif_pixel px = vb->buffer[cy * vb->pitch + cx];
553 SHMIF_RGBA_DECOMP(px, &r, &g, &b, &ign);
554 compress_in[compress_in_sz++] = acc[rs+0] ^ r;
555 compress_in[compress_in_sz++] = acc[rs+1] ^ g;
556 compress_in[compress_in_sz++] = acc[rs+2] ^ b;
557 acc[rs+0] = r; acc[rs+1] = g; acc[rs+2] = b;
558 rs += 3;
559 }
560 }
561 type = POSTPROCESS_VIDEO_DZSTD;
562 }
563
564 size_t out_sz;
565 uint8_t* buf;
566
567 out_sz = ZSTD_compressBound(compress_in_sz);
568 buf = malloc(out_sz);
569 if (!buf)
570 return (struct compress_res){};
571
572 out_sz = ZSTD_compressCCtx(
573 S->channels[ch].zstd, buf, out_sz, compress_in, compress_in_sz, 1);
574
575 if (ZSTD_isError(out_sz)){
576 a12int_trace(A12_TRACE_ALLOC,
577 "kind=zstd_fail:message=%s", ZSTD_getErrorName(out_sz));
578 free(buf);
579 return (struct compress_res){};
580 }
581
582 a12int_trace(A12_TRACE_VDETAIL,
583 "kind=status:codec=dzstd:b_in=%zu:b_out=%zu:ratio=%.2f",
584 compress_in_sz, out_sz, (float)(compress_in_sz+1.0) / (float)(out_sz+1.0)
585 );
586
587 return (struct compress_res){
588 .type = type,
589 .ok = buf != NULL,
590 .out_buf = buf,
591 .out_sz = out_sz,
592 .in_sz = compress_in_sz
593 };
594 }
595
a12int_encode_dzstd(PACK_ARGS)596 void a12int_encode_dzstd(PACK_ARGS)
597 {
598 struct compress_res cres = compress_deltaz(S, chid, vb, &x, &y, &w, &h, true);
599 if (!cres.ok)
600 return;
601
602 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
603 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
604 cres.type, sid, vb->w, vb->h, w, h, x, y,
605 cres.out_sz, cres.in_sz, 1, vb->flags.origo_ll
606 );
607
608 a12int_trace(A12_TRACE_VDETAIL,
609 "kind=status:codec=dzstd:b_in=%zu:b_out=%zu", w * h * 3, cres.out_sz
610 );
611
612 a12int_step_vstream(S, sid);
613 a12int_append_out(S,
614 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
615 chunk_pack(S, STATE_VIDEO_PACKET, chid, cres.out_buf, cres.out_sz, chunk_sz);
616
617 free(cres.out_buf);
618 }
619
620
a12int_encode_dpng(PACK_ARGS)621 void a12int_encode_dpng(PACK_ARGS)
622 {
623 struct compress_res cres = compress_deltaz(S, chid, vb, &x, &y, &w, &h, false);
624 if (!cres.ok)
625 return;
626
627 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
628 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
629 cres.type, sid, vb->w, vb->h, w, h, x, y,
630 cres.out_sz, cres.in_sz, 1, vb->flags.origo_ll
631 );
632
633 a12int_trace(A12_TRACE_VDETAIL,
634 "kind=status:codec=dpng:b_in=%zu:b_out=%zu", w * h * 3, cres.out_sz
635 );
636
637 a12int_step_vstream(S, sid);
638 a12int_append_out(S,
639 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
640 chunk_pack(S, STATE_VIDEO_PACKET, chid, cres.out_buf, cres.out_sz, chunk_sz);
641
642 free(cres.out_buf);
643 }
644
a12int_encode_drop(struct a12_state * S,int chid,bool failed)645 void a12int_encode_drop(struct a12_state* S, int chid, bool failed)
646 {
647 if (S->channels[chid].zstd){
648 ZSTD_freeCCtx(S->channels[chid].zstd);
649 S->channels[chid].zstd = NULL;
650 }
651
652 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
653 if (!S->channels[chid].videnc.encdec)
654 return;
655
656 /* dealloc context */
657 S->channels[chid].videnc.encdec = NULL;
658 S->channels[chid].videnc.failed = failed;
659
660 if (S->channels[chid].videnc.scaler){
661 sws_freeContext(S->channels[chid].videnc.scaler);
662 S->channels[chid].videnc.scaler = NULL;
663 }
664
665 if (S->channels[chid].videnc.frame){
666 av_frame_free(&S->channels[chid].videnc.frame);
667 }
668
669 /* free both sets NULL and noops on NULL */
670 av_packet_free(&S->channels[chid].videnc.packet);
671 #endif
672
673 a12int_trace(A12_TRACE_VIDEO, "dropping h264 context");
674 }
675
676 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
677
open_videnc(struct a12_state * S,struct a12_vframe_opts venc_opts,struct shmifsrv_vbuffer * vb,int chid,int codecid)678 static bool open_videnc(struct a12_state* S,
679 struct a12_vframe_opts venc_opts,
680 struct shmifsrv_vbuffer* vb, int chid, int codecid)
681 {
682 a12int_trace(A12_TRACE_VIDEO,
683 "kind=codec:status=open:ch=%d:codec=%d", chid, codecid);
684 AVCodec* codec = S->channels[chid].videnc.codec;
685 AVFrame* frame = NULL;
686 AVPacket* packet = NULL;
687 struct SwsContext* scaler = NULL;
688
689 if (!codec){
690 codec = avcodec_find_encoder(codecid);
691 if (!codec)
692 return false;
693 S->channels[chid].videnc.codec = codec;
694 }
695
696 /*
697 * prior to this, we have a safeguard if the input resolution isn't % 2 so
698 * this requirement for ffmpeg holds -- the other option is to pad and crop
699 * as part of the swscale pixfmt conversion.
700 */
701 AVCodecContext* encoder = avcodec_alloc_context3(codec);
702 S->channels[chid].videnc.encdec = encoder;
703 S->channels[chid].videnc.w = vb->w;
704 S->channels[chid].videnc.h = vb->h;
705
706 /* Check opts and switch preset, bitrate, tuning etc. based on resolution
707 * and link estimates. Later we should switch this dynamically, possibly
708 * reconfigure based on AV_CODEC_CAP_PARAM_CHANGE */
709 if (codecid == AV_CODEC_ID_H264){
710 switch(venc_opts.bias){
711 case VFRAME_BIAS_LATENCY:
712 av_opt_set(encoder->priv_data, "preset", "veryfast", 0);
713 av_opt_set(encoder->priv_data, "tune", "zerolatency", 0);
714 a12int_trace(A12_TRACE_VIDEO, "kind=encopt:zerolatency");
715 break;
716
717 /* Many more dynamic heuristics to consider here, doing rolling frame contents
718 * based on segment type and to distinguish GAME based on the complexity
719 * (retro/pixelart vs. 3D) and on the load */
720 case VFRAME_BIAS_BALANCED:
721 av_opt_set(encoder->priv_data, "preset", "medium", 0);
722 av_opt_set(encoder->priv_data, "tune", "film", 0);
723 a12int_trace(A12_TRACE_VIDEO, "kind=encopt:mediumfilm");
724 break;
725
726 case VFRAME_BIAS_QUALITY:
727 av_opt_set(encoder->priv_data, "preset", "slow", 0);
728 av_opt_set(encoder->priv_data, "tune", "film", 0);
729 a12int_trace(A12_TRACE_VIDEO, "kind=encopt:slowfilm");
730 break;
731 }
732 }
733
734 /* should expose a lot more options passable from the transport layer here */
735 if (!venc_opts.ratefactor)
736 venc_opts.ratefactor = 22;
737
738 char buf[8];
739 snprintf(buf, 8, "%d", venc_opts.ratefactor);
740 av_opt_set(encoder->priv_data, "crf", buf, 0);
741
742 /* this caps the ratefactor based on an eval buffer window */
743 if (!venc_opts.bitrate)
744 venc_opts.bitrate = 1000;
745
746 snprintf(buf, 8, "%zu", (size_t) venc_opts.bitrate * 1000);
747 av_opt_set(encoder->priv_data, "maxrate", buf, 0);
748
749 a12int_trace(A12_TRACE_VIDEO,
750 "kind=encval:crf=%d:rate=%zu", venc_opts.ratefactor, venc_opts.bitrate);
751
752 encoder->width = vb->w;
753 encoder->height = vb->h;
754
755 /* uncertain about the level of VFR support, but that's really what we need
756 * and then possibly abuse the PTS field to prebuffer frames in the context
757 * of video playback and so on. */
758 encoder->time_base = (AVRational){1, 25};
759 encoder->framerate = (AVRational){25, 1};
760 encoder->gop_size = 1;
761 encoder->max_b_frames = 1;
762 encoder->pix_fmt = AV_PIX_FMT_YUV420P;
763 if (avcodec_open2(encoder, codec, NULL) < 0)
764 goto fail;
765
766 frame = av_frame_alloc();
767 if (!frame)
768 goto fail;
769
770 packet = av_packet_alloc();
771 if (!packet)
772 goto fail;
773
774 frame->format = AV_PIX_FMT_YUV420P;
775 frame->width = vb->w;
776 frame->height = vb->h;
777 frame->pts = 0;
778
779 if (av_frame_get_buffer(frame, 32) < 0 ||
780 av_frame_make_writable(frame) < 0)
781 goto fail;
782
783 S->channels[chid].videnc.encdec = encoder;
784
785 scaler = sws_getContext(
786 vb->w, vb->h, AV_PIX_FMT_BGRA,
787 vb->w, vb->h, AV_PIX_FMT_YUV420P,
788 SWS_BILINEAR, NULL, NULL, NULL
789 );
790
791 if (!scaler)
792 goto fail;
793
794 S->channels[chid].videnc.scaler = scaler;
795 S->channels[chid].videnc.frame = frame;
796 S->channels[chid].videnc.packet = packet;
797
798 a12int_trace(A12_TRACE_VIDEO, "kind=codec_ok:ch=%d:codec=%d", chid, codecid);
799 return true;
800
801 fail:
802 if (frame)
803 av_frame_free(&frame);
804 if (packet)
805 av_packet_free(&packet);
806 if (scaler)
807 sws_freeContext(scaler);
808 a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=could not setup codec");
809 return false;
810 }
811 #endif
812
a12int_encode_h264(PACK_ARGS)813 void a12int_encode_h264(PACK_ARGS)
814 {
815 /* A major complication here is that there is a requirement for the
816 * source- width and height to be evenly divisible by 2. The option
817 * then is to pad, or the cheap fallback of switching codec. Let us
818 * go with the cheap one for now. */
819 #ifdef WANT_H264_ENC
820 if (vb->w % 2 != 0 || vb->h % 2 != 0){
821 a12int_encode_drop(S, chid, true);
822 }
823
824 /* On resize, rebuild the encoder stage and send new headers etc. */
825 else if (
826 vb->w != S->channels[chid].videnc.w ||
827 vb->h != S->channels[chid].videnc.h)
828 a12int_encode_drop(S, chid, false);
829
830 /* If we don't have an encoder (first time or reset due to resize),
831 * try to configure, and if the configuration fails (i.e. still no
832 * encoder set) fallback to DPNG and only try again on new size. */
833 if (!S->channels[chid].videnc.encdec &&
834 !S->channels[chid].videnc.failed){
835 if (!open_videnc(S, opts, vb, chid, AV_CODEC_ID_H264)){
836 a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=h264 codec failed");
837 a12int_encode_drop(S, chid, true);
838 }
839 else
840 a12int_trace(A12_TRACE_VIDEO, "kind=status:ch=%d:message=set-h264", chid);
841 }
842
843 /* on failure, just fallback and retry alloc on dimensions change */
844 if (S->channels[chid].videnc.failed)
845 goto fallback;
846
847 /* just for shorthand */
848 AVFrame* frame = S->channels[chid].videnc.frame;
849 AVCodecContext* encoder = S->channels[chid].videnc.encdec;
850 AVPacket* packet = S->channels[chid].videnc.packet;
851 struct SwsContext* scaler = S->channels[chid].videnc.scaler;
852
853 /* missing:
854 *
855 * there is associated-data that can be set to the frame which the encoder
856 * can use - a big and interesting one is REGIONS_OF_INTEREST that can be
857 * combined with our dirty-rectangles to help the encoder along.
858 *
859 * that should be something like av_set_side_data() and an
860 * 'adaptive quantization' mode (aq_mode == variance or autovariance)
861 *
862 * would be nice with representative examples first and quantifiers to
863 * assess the effect.
864 *
865 * other useful tuning is marking sbs for vr
866 */
867
868 /* and color-convert from src into frame */
869 int ret;
870 const uint8_t* const src[] = {(uint8_t*)vb->buffer};
871 int src_stride[] = {vb->stride};
872 int rv = sws_scale(scaler,
873 src, src_stride, 0, vb->h, frame->data, frame->linesize);
874 if (rv < 0){
875 a12int_trace(A12_TRACE_VIDEO, "rescaling failed: %d", rv);
876 a12int_encode_drop(S, chid, true);
877 goto fallback;
878 }
879
880 /* send to encoder, may return EAGAIN requesting a flush */
881 again:
882 frame->pts++;
883 ret = avcodec_send_frame(encoder, frame);
884 if (ret < 0 && ret != AVERROR(EAGAIN)){
885 a12int_trace(A12_TRACE_VIDEO, "encoder failed: %d", ret);
886 a12int_encode_drop(S, chid, true);
887 goto fallback;
888 }
889
890 /* flush, 0 is OK, < 0 and not EAGAIN is a real error */
891 int out_ret;
892 do {
893 out_ret = avcodec_receive_packet(encoder, packet);
894 if (out_ret == AVERROR(EAGAIN) || out_ret == AVERROR_EOF)
895 return;
896
897 else if (out_ret < 0){
898 a12int_trace(
899 A12_TRACE_VIDEO, "error getting packet from encoder: %d", rv);
900 a12int_encode_drop(S, chid, true);
901 goto fallback;
902 }
903
904 a12int_trace(A12_TRACE_VDETAIL, "videnc: %5d", packet->size);
905
906 /* don't see a nice way to combine ffmpegs view of 'packets' and ours,
907 * maybe we could avoid it and the extra copy but uncertain */
908 uint8_t hdr_buf[CONTROL_PACKET_SIZE];
909 a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
910 POSTPROCESS_VIDEO_H264, sid, vb->w, vb->h, vb->w, vb->h,
911 0, 0, packet->size, vb->w * vb->h * 4, 1, vb->flags.origo_ll
912 );
913 a12int_step_vstream(S, sid);
914 a12int_append_out(S,
915 STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
916
917 chunk_pack(S, STATE_VIDEO_PACKET, chid, packet->data, packet->size, chunk_sz);
918 av_packet_unref(packet);
919 }
920 while (out_ret >= 0);
921
922 /* frame never got encoded, should work now */
923 if (ret == AVERROR(EAGAIN))
924 goto again;
925
926 return;
927
928 fallback:
929 a12int_encode_dpng(FWD_ARGS);
930 #else
931 a12int_encode_dpng(FWD_ARGS);
932 #endif
933 a12int_trace(A12_TRACE_VIDEO, "switching to fallback (PNG) on videnc fail");
934 }
935