1 /*
2  * Copyright: Björn Ståhl
3  * Description: A12 protocol state machine
4  * License: 3-Clause BSD, see COPYING file in arcan source repository.
5  * Reference: https://arcan-fe.com
6  */
7 #include <arcan_shmif.h>
8 #include <arcan_shmif_server.h>
9 
10 #include <inttypes.h>
11 #include <string.h>
12 #include <math.h>
13 
14 #include "a12.h"
15 #include "a12_int.h"
16 #include "a12_encode.h"
17 
18 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
19 #include "zstd.h"
20 
21 /*
22  * create the control packet
23  */
a12int_vframehdr_build(uint8_t buf[CONTROL_PACKET_SIZE],uint64_t last_seen,uint8_t chid,int type,uint32_t sid,uint16_t sw,uint16_t sh,uint16_t w,uint16_t h,uint16_t x,uint16_t y,uint32_t len,uint32_t exp_len,bool commit,uint8_t flags)24 static void a12int_vframehdr_build(
25 	uint8_t buf[CONTROL_PACKET_SIZE],
26 	uint64_t last_seen, uint8_t chid,
27 	int type, uint32_t sid,
28 	uint16_t sw, uint16_t sh, uint16_t w, uint16_t h, uint16_t x, uint16_t y,
29 	uint32_t len, uint32_t exp_len, bool commit, uint8_t flags)
30 {
31 	a12int_trace(A12_TRACE_VDETAIL,
32 		"kind=header:ch=%"PRIu8":type=%d:stream=%"PRIu32
33 		":sw=%"PRIu16":sh=%"PRIu16":w=%"PRIu16":h=%"PRIu16":x=%"PRIu16
34 		":y=%"PRIu16":len=%"PRIu32":exp_len=%"PRIu32,
35 		chid, type, sid, sw, sh, w, h, x, y, len, exp_len
36 	);
37 
38 	memset(buf, '\0', CONTROL_PACKET_SIZE);
39 	pack_u64(last_seen, &buf[0]);
40 	arcan_random(&buf[8], 8); /* 0..8 entropy */
41 
42 	buf[16] = chid; /* [16] : channel-id */
43 	buf[17] = COMMAND_VIDEOFRAME; /* [17] : command */
44 	pack_u32(sid, &buf[18]); /* [18..21] : stream-id */
45 	buf[22] = type; /* [22] : type */
46 	pack_u16(sw, &buf[23]); /* [23..24] : surfacew */
47 	pack_u16(sh, &buf[25]); /* [25..26] : surfaceh */
48 	pack_u16(x, &buf[27]); /* [27..28] : startx */
49 	pack_u16(y, &buf[29]); /* [29..30] : starty */
50 	pack_u16(w, &buf[31]); /* [31..32] : framew */
51 	pack_u16(h, &buf[33]); /* [33..34] : frameh */
52 	pack_u32(len, &buf[36]); /* [36..39] : length */
53 	pack_u32(exp_len, &buf[40]); /* [40..43] : exp-length */
54 
55 	buf[35] = flags; /* [35] : dataflags: uint8 */
56 
57 /* [40] Commit on completion, this is always set right now but will change
58  * when 'chain of deltas' mode for shmif is added */
59 	buf[44] = commit;
60 }
61 
62 /*
63  * Need to chunk up a binary stream that do not have intermediate headers, that
64  * typically comes with the compression / h264 / ...  output. To avoid yet
65  * another copy, we use the prepend mechanism in a12int_append_out.
66  */
chunk_pack(struct a12_state * S,int type,uint8_t chid,uint8_t * buf,size_t buf_sz,size_t chunk_sz)67 static void chunk_pack(struct a12_state* S, int type,
68 	uint8_t chid, uint8_t* buf, size_t buf_sz, size_t chunk_sz)
69 {
70 	size_t n_chunks = buf_sz / chunk_sz;
71 
72 	uint8_t outb[a12int_header_size(type)];
73 	outb[0] = chid; /* [0] : channel id */
74 	pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
75 	pack_u16(chunk_sz, &outb[5]); /* [5..6] : length */
76 
77 	for (size_t i = 0; i < n_chunks; i++){
78 		a12int_append_out(S, type, &buf[i * chunk_sz], chunk_sz, outb, sizeof(outb));
79 	}
80 
81 	size_t left = buf_sz - n_chunks * chunk_sz;
82 	pack_u16(left, &outb[5]); /* [5..6] : length */
83 	if (left)
84 		a12int_append_out(S, type, &buf[n_chunks * chunk_sz], left, outb, sizeof(outb));
85 }
86 
a12int_encode_araw(struct a12_state * S,uint8_t chid,shmif_asample * buf,uint16_t n_samples,struct a12_aframe_cfg cfg,struct a12_aframe_opts opts,size_t chunk_sz)87 void a12int_encode_araw(struct a12_state* S,
88 	uint8_t chid,
89 	shmif_asample* buf,
90 	uint16_t n_samples,
91 	struct a12_aframe_cfg cfg,
92 	struct a12_aframe_opts opts, size_t chunk_sz)
93 {
94 /* repack the audio into a temporary buffer for format reasons */
95 	size_t hdr_sz = a12int_header_size(STATE_AUDIO_PACKET);
96 	size_t buf_sz = hdr_sz + n_samples * sizeof(uint16_t) * cfg.channels;
97 	uint8_t* outb = malloc(hdr_sz + buf_sz);;
98 	if (!outb){
99 		a12int_trace(A12_TRACE_ALLOC,
100 			"failed to alloc %zu for s16aud", buf_sz);
101 		return;
102 	}
103 
104 /* audio control message header */
105 	outb[16] = chid;
106 	outb[17] = COMMAND_AUDIOFRAME;
107 	pack_u32(0, &outb[18]); /* stream-id */
108 	outb[22] = cfg.channels; /* channels */
109 	outb[23] = 0; /* encoding, u16 */
110 	pack_u16(n_samples, &outb[24]);
111 
112 /* repack into the right format (note, need _Generic on asample) */
113 	size_t pos = hdr_sz;
114 	for (size_t i = 0; i < n_samples; i++, pos += 2){
115 		pack_s16(buf[i], &outb[pos]);
116 	}
117 
118 /* then split it up (though likely we get fed much smaller chunks) */
119 	a12int_append_out(S,
120 		STATE_CONTROL_PACKET, outb, CONTROL_PACKET_SIZE, NULL, 0);
121 	chunk_pack(S, STATE_AUDIO_PACKET, chid, &outb[hdr_sz], pos - hdr_sz, chunk_sz);
122 	free(outb);
123 }
124 
125 /*
126  * the rgb565, rgb and rgba function all follow the same pattern
127  */
a12int_encode_rgb565(PACK_ARGS)128 void a12int_encode_rgb565(PACK_ARGS)
129 {
130 	size_t px_sz = 2;
131 
132 /* calculate chunk sizes based on a fitting amount of pixels */
133 	size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
134 	size_t ppb = (chunk_sz - hdr_sz) / px_sz;
135 	size_t bpb = ppb * px_sz;
136 	size_t blocks = w * h / ppb;
137 
138 	shmif_pixel* inbuf = vb->buffer;
139 	size_t pos = y * vb->pitch + x;
140 
141 /* get the packing buffer, cancel if oom */
142 	uint8_t* outb = malloc(hdr_sz + bpb);
143 	if (!outb){
144 		a12int_trace(A12_TRACE_ALLOC,
145 			"failed to alloc %zu for rgb565", hdr_sz + bpb);
146 		return;
147 	}
148 
149 /* store the control frame that defines our video buffer */
150 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
151 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
152 		POSTPROCESS_VIDEO_RGB565, sid, vb->w, vb->h, w, h, x, y,
153 		w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll);
154 	a12int_step_vstream(S, sid);
155 	a12int_append_out(S,
156 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
157 
158 	outb[0] = chid; /* [0] : channel id */
159 	pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
160 	pack_u16(bpb, &outb[5]); /* [5..6] : length */
161 
162 /* sweep the incoming frame, and pack maximum block size */
163 	size_t row_len = w;
164 	for (size_t i = 0; i < blocks; i++){
165 		for (size_t j = 0; j < bpb; j += px_sz){
166 			uint8_t r, g, b, ign;
167 			uint16_t px;
168 			SHMIF_RGBA_DECOMP(inbuf[pos++], &r, &g, &b, &ign);
169 			px =
170 				(((b >> 3) & 0x1f) << 0) |
171 				(((g >> 2) & 0x3f) << 5) |
172 				(((r >> 3) & 0x1f) << 11)
173 			;
174 			pack_u16(px, &outb[hdr_sz+j]);
175 			row_len--;
176 			if (row_len == 0){
177 				pos += vb->pitch - w;
178 				row_len = w;
179 			}
180 		}
181 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
182 	}
183 
184 /* last chunk */
185 	size_t left = ((w * h) - (blocks * ppb)) * px_sz;
186 	if (left){
187 		pack_u16(left, &outb[5]);
188 		a12int_trace(A12_TRACE_VDETAIL, "small block of %zu bytes", left);
189 		for (size_t i = 0; i < left; i+= px_sz){
190 			uint8_t r, g, b, ign;
191 			uint16_t px;
192 			SHMIF_RGBA_DECOMP(inbuf[pos++], &r, &g, &b, &ign);
193 			px =
194 				(((b >> 3) & 0x1f) << 0) |
195 				(((g >> 2) & 0x3f) << 5) |
196 				(((r >> 3) & 0x1f) << 11)
197 			;
198 			pack_u16(px, &outb[hdr_sz+i]);
199 			row_len--;
200 			if (row_len == 0){
201 				pos += vb->pitch - w;
202 				row_len = w;
203 			}
204 		}
205 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, left+hdr_sz, NULL, 0);
206 	}
207 
208 	free(outb);
209 }
210 
a12int_encode_rgba(PACK_ARGS)211 void a12int_encode_rgba(PACK_ARGS)
212 {
213 	size_t px_sz = 4;
214 	a12int_trace(A12_TRACE_VDETAIL, "kind=status:codec=rgba");
215 
216 /* calculate chunk sizes based on a fitting amount of pixels */
217 	size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
218 	size_t ppb = (chunk_sz - hdr_sz) / px_sz;
219 	size_t bpb = ppb * px_sz;
220 	size_t blocks = w * h / ppb;
221 
222 	shmif_pixel* inbuf = vb->buffer;
223 	size_t pos = y * vb->pitch + x;
224 
225 /* get the packing buffer, cancel if oom */
226 	uint8_t* outb = malloc(hdr_sz + bpb);
227 	if (!outb)
228 		return;
229 
230 /* store the control frame that defines our video buffer */
231 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
232 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
233 		POSTPROCESS_VIDEO_RGBA, sid, vb->w, vb->h, w, h, x, y,
234 		w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll
235 	);
236 	a12int_step_vstream(S, sid);
237 	a12int_append_out(S,
238 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
239 
240 	outb[0] = chid; /* [0] : channel id */
241 	pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
242 	pack_u16(bpb, &outb[5]); /* [5..6] : length */
243 
244 /* sweep the incoming frame, and pack maximum block size */
245 	size_t row_len = w;
246 	for (size_t i = 0; i < blocks; i++){
247 		for (size_t j = 0; j < bpb; j += px_sz){
248 			uint8_t* dst = &outb[hdr_sz+j];
249 			SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &dst[3]);
250 			row_len--;
251 			if (row_len == 0){
252 				pos += vb->pitch - w;
253 				row_len = w;
254 			}
255 		}
256 
257 /* dispatch to out-queue(s) */
258 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
259 	}
260 
261 /* last chunk */
262 	size_t left = ((w * h) - (blocks * ppb)) * px_sz;
263 	if (left){
264 		pack_u16(left, &outb[5]);
265 		a12int_trace(A12_TRACE_VDETAIL,
266 			"kind=status:message=padblock:size=%zu", left);
267 		for (size_t i = 0; i < left; i+= px_sz){
268 			uint8_t* dst = &outb[hdr_sz+i];
269 			SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &dst[3]);
270 			row_len--;
271 			if (row_len == 0){
272 				pos += vb->pitch - w;
273 				row_len = w;
274 			}
275 		}
276 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + left, NULL, 0);
277 	}
278 
279 	free(outb);
280 }
281 
a12int_encode_rgb(PACK_ARGS)282 void a12int_encode_rgb(PACK_ARGS)
283 {
284 	size_t px_sz = 3;
285 	a12int_trace(A12_TRACE_VDETAIL, "kind=status:ch=%"PRIu8"codec=rgb", (uint8_t) chid);
286 
287 /* calculate chunk sizes based on a fitting amount of pixels */
288 	size_t hdr_sz = a12int_header_size(STATE_VIDEO_PACKET);
289 	size_t ppb = (chunk_sz - hdr_sz) / px_sz;
290 	size_t bpb = ppb * px_sz;
291 	size_t blocks = w * h / ppb;
292 
293 	shmif_pixel* inbuf = vb->buffer;
294 	size_t pos = y * vb->pitch + x;
295 
296 /* get the packing buffer, cancel if oom */
297 	uint8_t* outb = malloc(hdr_sz + bpb);
298 	if (!outb)
299 		return;
300 
301 /* store the control frame that defines our video buffer */
302 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
303 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
304 		POSTPROCESS_VIDEO_RGB, sid, vb->w, vb->h, w, h, x, y,
305 		w * h * px_sz, w * h * px_sz, 1, vb->flags.origo_ll
306 	);
307 	a12int_step_vstream(S, sid);
308 	a12int_append_out(S,
309 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
310 
311 	outb[0] = chid; /* [0] : channel id */
312 	pack_u32(0xbacabaca, &outb[1]); /* [1..4] : stream */
313 	pack_u16(bpb, &outb[5]); /* [5..6] : length */
314 
315 /* sweep the incoming frame, and pack maximum block size */
316 	size_t row_len = w;
317 	for (size_t i = 0; i < blocks; i++){
318 		for (size_t j = 0; j < bpb; j += px_sz){
319 			uint8_t ign;
320 			uint8_t* dst = &outb[hdr_sz+j];
321 			SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &ign);
322 			row_len--;
323 			if (row_len == 0){
324 				pos += vb->pitch - w;
325 				row_len = w;
326 			}
327 		}
328 
329 /* dispatch to out-queue(s) */
330 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bpb, NULL, 0);
331 	}
332 
333 /* pack the last chunk (if w * h % ppb != 0)
334  */
335 	size_t bytes_left = ((w * h) - (blocks * ppb)) * px_sz;
336 	if (bytes_left){
337 		size_t ofs = 0;
338 		pack_u16(bytes_left, &outb[5]);
339 
340 		while (bytes_left - ofs){
341 			uint8_t ign;
342 			uint8_t* dst = &outb[hdr_sz+ofs];
343 			SHMIF_RGBA_DECOMP(inbuf[pos++], &dst[0], &dst[1], &dst[2], &ign);
344 			ofs += px_sz;
345 
346 			row_len--;
347 			if (row_len == 0){
348 				pos += vb->pitch - w;
349 				row_len = w;
350 			}
351 		}
352 
353 		a12int_append_out(S, STATE_VIDEO_PACKET, outb, hdr_sz + bytes_left, NULL, 0);
354 	}
355 
356 	free(outb);
357 }
358 
359 /* Model indicates which pre-trained model to use, this is currently only
360  * used for TPACK but if there is more domain information to be had, here
361  * is the slot to patch that in. */
setup_zstd(struct a12_state * S,uint8_t ch,int model)362 static bool setup_zstd(struct a12_state* S, uint8_t ch, int model)
363 {
364 	if (!S->channels[ch].zstd){
365 		S->channels[ch].zstd = ZSTD_createCCtx();
366 		if (!S->channels[ch].zstd){
367 			return false;
368 		}
369 		ZSTD_CCtx_setParameter(S->channels[ch].zstd, ZSTD_c_nbWorkers, 4);
370 	}
371 
372 	return true;
373 }
374 
375 struct compress_res {
376 	bool ok;
377 	uint8_t type;
378 	size_t in_sz;
379 	size_t out_sz;
380 	uint8_t* out_buf;
381 };
382 
compress_tzstd(struct a12_state * S,uint8_t ch,struct shmifsrv_vbuffer * vb,uint32_t sid,int w,int h,size_t chunk_sz)383 static void compress_tzstd(struct a12_state* S, uint8_t ch,
384 	struct shmifsrv_vbuffer* vb, uint32_t sid, int w, int h, size_t chunk_sz)
385 {
386 	if (!setup_zstd(S, ch, SEGID_TUI)){
387 		return;
388 	}
389 	int type = POSTPROCESS_VIDEO_TZSTD;
390 
391 /* full header-size: 4 + 2 + 2 + 1 + 2 + 4 + 1 = 16 bytes */
392 /* first 4 bytes is length */
393 	uint32_t compress_in_sz;
394 	unpack_u32(&compress_in_sz, vb->buffer_bytes);
395 
396 /* second 2 bytes is number of lines (line-header size) */
397 	uint16_t n_lines;
398 	unpack_u16(&n_lines, &vb->buffer_bytes[4]);
399 
400 /* third 2 bytes is number of cells */
401 	uint16_t n_cells;
402 	unpack_u16(&n_cells, &vb->buffer_bytes[6]);
403 
404 /* line-header size (2 + 2 + 2 + 3 = 9 bytes), cell size = 12 bytes) */
405 	if (compress_in_sz != n_lines * 9 + n_cells * 12 + 16){
406 		a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=corrupt TPACK buffer");
407 		return;
408 	}
409 
410 #ifdef DUMP_TRAIN
411 	static size_t counter = 0;
412 	char tmpnam[16];
413 	snprintf(tmpnam, 16, "tp_%zu.raw", counter);
414 	FILE* fout = fopen(tmpnam, "w+");
415 	fwrite(vb->buffer_bytes, compress_in_sz, 1, fout);
416 	fclose(fout);
417 	counter++;
418 #endif
419 
420 	size_t out_sz;
421 	uint8_t* buf;
422 	out_sz = ZSTD_compressBound(compress_in_sz);
423 	buf = malloc(out_sz);
424 
425 	out_sz = ZSTD_compressCCtx(
426 		S->channels[ch].zstd, buf, out_sz, vb->buffer_bytes, compress_in_sz, 1);
427 
428 	if (ZSTD_isError(out_sz)){
429 		a12int_trace(A12_TRACE_ALLOC,
430 			"kind=zstd_fail:message=%s", ZSTD_getErrorName(out_sz));
431 		free(buf);
432 		return;
433 	}
434 
435 	a12int_trace(A12_TRACE_VDETAIL,
436 		"kind=status:codec=dzstd:b_in=%zu:b_out=%zu:ratio=%.2f",
437 		(size_t)compress_in_sz,
438 		(size_t) out_sz, (float)(compress_in_sz+1.0) / (float)(out_sz+1.0)
439 	);
440 
441 	if (!buf){
442 		a12int_trace(A12_TRACE_ALLOC, "failed to build compressed TPACK output");
443 		return;
444 	}
445 
446 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
447 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, ch,
448 		type, sid, vb->w, vb->h, w, h, 0, 0,
449 		out_sz, compress_in_sz, 1, vb->flags.origo_ll
450 	);
451 
452 	a12int_trace(A12_TRACE_VDETAIL,
453 		"kind=status:codec=tpack:b_in=%zu:b_out=%zu",
454 		(size_t) compress_in_sz, (size_t) out_sz
455 	);
456 
457 	a12int_step_vstream(S, sid);
458 	a12int_append_out(S,
459 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
460 
461 	chunk_pack(S, STATE_VIDEO_PACKET, ch, buf, out_sz, chunk_sz);
462 	free(buf);
463 }
464 
a12int_encode_ztz(PACK_ARGS)465 void a12int_encode_ztz(PACK_ARGS)
466 {
467 	compress_tzstd(S, chid, vb, sid, w, h, chunk_sz);
468 }
469 
compress_deltaz(struct a12_state * S,uint8_t ch,struct shmifsrv_vbuffer * vb,size_t * x,size_t * y,size_t * w,size_t * h,bool zstd)470 static struct compress_res compress_deltaz(struct a12_state* S, uint8_t ch,
471 	struct shmifsrv_vbuffer* vb, size_t* x, size_t* y, size_t* w, size_t* h, bool zstd)
472 {
473 	int type;
474 	uint8_t* compress_in;
475 	size_t compress_in_sz = 0;
476 	struct shmifsrv_vbuffer* ab = &S->channels[ch].acc;
477 
478 /* reset the accumulation buffer so that we rebuild the normal frame */
479 	if (ab->w != vb->w || ab->h != vb->h){
480 		a12int_trace(A12_TRACE_VIDEO,
481 			"kind=resize:ch=%"PRIu8"prev_w=%zu:rev_h=%zu:new_w%zu:new_h=%zu",
482 			ch, (size_t) ab->w, (size_t) ab->h, (size_t) vb->w, (size_t) vb->h
483 		);
484 		free(ab->buffer);
485 		free(S->channels[ch].compression);
486 		ab->buffer = NULL;
487 		S->channels[ch].compression = NULL;
488 	}
489 
490 	if (!setup_zstd(S, ch, SEGID_APPLICATION)){
491 		return (struct compress_res){};
492 	}
493 
494 /* first, reset or no-delta mode, build accumulation buffer and copy */
495 	if (!ab->buffer){
496 		type = POSTPROCESS_VIDEO_ZSTD;
497 		*ab = *vb;
498 		size_t nb = vb->w * vb->h * 3;
499 		ab->buffer = malloc(nb);
500 		*w = vb->w;
501 		*h = vb->h;
502 		*x = 0;
503 		*y = 0;
504 		a12int_trace(A12_TRACE_VIDEO,
505 			"kind=status:ch=%"PRIu8"compress=dpng:message=I", ch);
506 
507 		if (!ab->buffer)
508 			return (struct compress_res){};
509 
510 /* the compression buffer stores a ^ b, accumulation is a packed copy of the
511  * contents of the previous input frame, this should provide a better basis for
512  * deflates RLE etc. stages, but also act as an option for us to provide our
513  * cheaper RLE or send out a raw- frame when the RLE didn't work out */
514 		S->channels[ch].compression = malloc(nb);
515 		compress_in_sz = nb;
516 
517 		if (!S->channels[ch].compression){
518 			free(ab->buffer);
519 			ab->buffer = NULL;
520 			return (struct compress_res){};
521 		}
522 
523 /* so accumulation buffer might be tightly packed while the source
524  * buffer do not have to be, thus we need to iterate and do this copy */
525 		compress_in = (uint8_t*) ab->buffer;
526 		uint8_t* acc = compress_in;
527 		size_t ofs = 0;
528 		for (size_t y = 0; y < vb->h; y++){
529 			for (size_t x = 0; x < vb->w; x++){
530 				uint8_t ign;
531 				shmif_pixel px = vb->buffer[y*vb->pitch+x];
532 				SHMIF_RGBA_DECOMP(px, &acc[ofs], &acc[ofs+1], &acc[ofs+2], &ign);
533 				ofs += 3;
534 			}
535 		}
536 	}
537 /* We have a delta frame, use accumulation buffer as a way to calculate a ^ b
538  * and store ^ b. For smaller regions, we might want to do something simpler
539  * like RLE only. The flags (,0) can be derived with the _zip helper */
540 	else {
541 		a12int_trace(A12_TRACE_VDETAIL,
542 			"kind=status:ch=%"PRIu8"dw=%zu:dh=%zu:x=%zu:y=%zu",
543 			ch, (size_t)*w, (size_t)*h, (size_t) *x, (size_t) *y
544 		);
545 		compress_in = S->channels[ch].compression;
546 		uint8_t* acc = (uint8_t*) ab->buffer;
547 		for (size_t cy = (*y); cy < (*y)+(*h); cy++){
548 			size_t rs = (cy * ab->w + (*x)) * 3;
549 
550 			for (size_t cx = *x; cx < (*x)+(*w); cx++){
551 				uint8_t r, g, b, ign;
552 				shmif_pixel px = vb->buffer[cy * vb->pitch + cx];
553 				SHMIF_RGBA_DECOMP(px, &r, &g, &b, &ign);
554 				compress_in[compress_in_sz++] = acc[rs+0] ^ r;
555 				compress_in[compress_in_sz++] = acc[rs+1] ^ g;
556 				compress_in[compress_in_sz++] = acc[rs+2] ^ b;
557 				acc[rs+0] = r; acc[rs+1] = g; acc[rs+2] = b;
558 				rs += 3;
559 			}
560 		}
561 		type = POSTPROCESS_VIDEO_DZSTD;
562 	}
563 
564 	size_t out_sz;
565 	uint8_t* buf;
566 
567 	out_sz = ZSTD_compressBound(compress_in_sz);
568 	buf = malloc(out_sz);
569 	if (!buf)
570 		return (struct compress_res){};
571 
572 	out_sz = ZSTD_compressCCtx(
573 		S->channels[ch].zstd, buf, out_sz, compress_in, compress_in_sz, 1);
574 
575 	if (ZSTD_isError(out_sz)){
576 		a12int_trace(A12_TRACE_ALLOC,
577 			"kind=zstd_fail:message=%s", ZSTD_getErrorName(out_sz));
578 		free(buf);
579 		return (struct compress_res){};
580 	}
581 
582 	a12int_trace(A12_TRACE_VDETAIL,
583 		"kind=status:codec=dzstd:b_in=%zu:b_out=%zu:ratio=%.2f",
584 		compress_in_sz, out_sz, (float)(compress_in_sz+1.0) / (float)(out_sz+1.0)
585 	);
586 
587 	return (struct compress_res){
588 		.type = type,
589 		.ok = buf != NULL,
590 		.out_buf = buf,
591 		.out_sz = out_sz,
592 		.in_sz = compress_in_sz
593 	};
594 }
595 
a12int_encode_dzstd(PACK_ARGS)596 void a12int_encode_dzstd(PACK_ARGS)
597 {
598 	struct compress_res cres = compress_deltaz(S, chid, vb, &x, &y, &w, &h, true);
599 	if (!cres.ok)
600 		return;
601 
602 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
603 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
604 		cres.type, sid, vb->w, vb->h, w, h, x, y,
605 		cres.out_sz, cres.in_sz, 1, vb->flags.origo_ll
606 	);
607 
608 	a12int_trace(A12_TRACE_VDETAIL,
609 		"kind=status:codec=dzstd:b_in=%zu:b_out=%zu", w * h * 3, cres.out_sz
610 	);
611 
612 	a12int_step_vstream(S, sid);
613 	a12int_append_out(S,
614 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
615 	chunk_pack(S, STATE_VIDEO_PACKET, chid, cres.out_buf, cres.out_sz, chunk_sz);
616 
617 	free(cres.out_buf);
618 }
619 
620 
a12int_encode_dpng(PACK_ARGS)621 void a12int_encode_dpng(PACK_ARGS)
622 {
623 	struct compress_res cres = compress_deltaz(S, chid, vb, &x, &y, &w, &h, false);
624 	if (!cres.ok)
625 		return;
626 
627 	uint8_t hdr_buf[CONTROL_PACKET_SIZE];
628 	a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
629 		cres.type, sid, vb->w, vb->h, w, h, x, y,
630 		cres.out_sz, cres.in_sz, 1, vb->flags.origo_ll
631 	);
632 
633 	a12int_trace(A12_TRACE_VDETAIL,
634 		"kind=status:codec=dpng:b_in=%zu:b_out=%zu", w * h * 3, cres.out_sz
635 	);
636 
637 	a12int_step_vstream(S, sid);
638 	a12int_append_out(S,
639 		STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
640 	chunk_pack(S, STATE_VIDEO_PACKET, chid, cres.out_buf, cres.out_sz, chunk_sz);
641 
642 	free(cres.out_buf);
643 }
644 
a12int_encode_drop(struct a12_state * S,int chid,bool failed)645 void a12int_encode_drop(struct a12_state* S, int chid, bool failed)
646 {
647 	if (S->channels[chid].zstd){
648 		ZSTD_freeCCtx(S->channels[chid].zstd);
649 		S->channels[chid].zstd = NULL;
650 	}
651 
652 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
653 	if (!S->channels[chid].videnc.encdec)
654 		return;
655 
656 /* dealloc context */
657 	S->channels[chid].videnc.encdec = NULL;
658 	S->channels[chid].videnc.failed = failed;
659 
660 	if (S->channels[chid].videnc.scaler){
661 		sws_freeContext(S->channels[chid].videnc.scaler);
662 		S->channels[chid].videnc.scaler = NULL;
663 	}
664 
665 	if (S->channels[chid].videnc.frame){
666 		av_frame_free(&S->channels[chid].videnc.frame);
667 	}
668 
669 /* free both sets NULL and noops on NULL */
670 	av_packet_free(&S->channels[chid].videnc.packet);
671 #endif
672 
673 	a12int_trace(A12_TRACE_VIDEO, "dropping h264 context");
674 }
675 
676 #if defined(WANT_H264_ENC) || defined(WANT_H264_DEC)
677 
open_videnc(struct a12_state * S,struct a12_vframe_opts venc_opts,struct shmifsrv_vbuffer * vb,int chid,int codecid)678 static bool open_videnc(struct a12_state* S,
679 	struct a12_vframe_opts venc_opts,
680 	struct shmifsrv_vbuffer* vb, int chid, int codecid)
681 {
682 	a12int_trace(A12_TRACE_VIDEO,
683 		"kind=codec:status=open:ch=%d:codec=%d", chid, codecid);
684 	AVCodec* codec = S->channels[chid].videnc.codec;
685 	AVFrame* frame = NULL;
686 	AVPacket* packet = NULL;
687 	struct SwsContext* scaler = NULL;
688 
689 	if (!codec){
690 		codec = avcodec_find_encoder(codecid);
691 		if (!codec)
692 			return false;
693 		S->channels[chid].videnc.codec = codec;
694 	}
695 
696 /*
697  * prior to this, we have a safeguard if the input resolution isn't % 2 so
698  * this requirement for ffmpeg holds -- the other option is to pad and crop
699  * as part of the swscale pixfmt conversion.
700  */
701 	AVCodecContext* encoder = avcodec_alloc_context3(codec);
702 	S->channels[chid].videnc.encdec = encoder;
703 	S->channels[chid].videnc.w = vb->w;
704 	S->channels[chid].videnc.h = vb->h;
705 
706 /* Check opts and switch preset, bitrate, tuning etc. based on resolution
707  * and link estimates. Later we should switch this dynamically, possibly
708  * reconfigure based on AV_CODEC_CAP_PARAM_CHANGE */
709 	if (codecid == AV_CODEC_ID_H264){
710 		switch(venc_opts.bias){
711 		case VFRAME_BIAS_LATENCY:
712 			av_opt_set(encoder->priv_data, "preset", "veryfast", 0);
713 			av_opt_set(encoder->priv_data, "tune", "zerolatency", 0);
714 			a12int_trace(A12_TRACE_VIDEO, "kind=encopt:zerolatency");
715 		break;
716 
717 /* Many more dynamic heuristics to consider here, doing rolling frame contents
718  * based on segment type and to distinguish GAME based on the complexity
719  * (retro/pixelart vs. 3D) and on the load */
720 		case VFRAME_BIAS_BALANCED:
721 			av_opt_set(encoder->priv_data, "preset", "medium", 0);
722 			av_opt_set(encoder->priv_data, "tune", "film", 0);
723 			a12int_trace(A12_TRACE_VIDEO, "kind=encopt:mediumfilm");
724 		break;
725 
726 		case VFRAME_BIAS_QUALITY:
727 			av_opt_set(encoder->priv_data, "preset", "slow", 0);
728 			av_opt_set(encoder->priv_data, "tune", "film", 0);
729 			a12int_trace(A12_TRACE_VIDEO, "kind=encopt:slowfilm");
730 		break;
731 		}
732 	}
733 
734 /* should expose a lot more options passable from the transport layer here */
735 	if (!venc_opts.ratefactor)
736 		venc_opts.ratefactor = 22;
737 
738 	char buf[8];
739 	snprintf(buf, 8, "%d", venc_opts.ratefactor);
740 	av_opt_set(encoder->priv_data, "crf", buf, 0);
741 
742 /* this caps the ratefactor based on an eval buffer window */
743 	if (!venc_opts.bitrate)
744 		venc_opts.bitrate = 1000;
745 
746 	snprintf(buf, 8, "%zu", (size_t) venc_opts.bitrate * 1000);
747 	av_opt_set(encoder->priv_data, "maxrate", buf, 0);
748 
749 	a12int_trace(A12_TRACE_VIDEO,
750 		"kind=encval:crf=%d:rate=%zu", venc_opts.ratefactor, venc_opts.bitrate);
751 
752 	encoder->width = vb->w;
753 	encoder->height = vb->h;
754 
755 /* uncertain about the level of VFR support, but that's really what we need
756  * and then possibly abuse the PTS field to prebuffer frames in the context
757  * of video playback and so on. */
758 	encoder->time_base = (AVRational){1, 25};
759 	encoder->framerate = (AVRational){25, 1};
760 	encoder->gop_size = 1;
761 	encoder->max_b_frames = 1;
762 	encoder->pix_fmt = AV_PIX_FMT_YUV420P;
763 	if (avcodec_open2(encoder, codec, NULL) < 0)
764 		goto fail;
765 
766 	frame = av_frame_alloc();
767 	if (!frame)
768 		goto fail;
769 
770 	packet = av_packet_alloc();
771 	if (!packet)
772 		goto fail;
773 
774 	frame->format = AV_PIX_FMT_YUV420P;
775 	frame->width = vb->w;
776 	frame->height = vb->h;
777 	frame->pts = 0;
778 
779 	if (av_frame_get_buffer(frame, 32) < 0 ||
780 		av_frame_make_writable(frame) < 0)
781 		goto fail;
782 
783 	S->channels[chid].videnc.encdec = encoder;
784 
785 	scaler = sws_getContext(
786 		vb->w, vb->h, AV_PIX_FMT_BGRA,
787 		vb->w, vb->h, AV_PIX_FMT_YUV420P,
788 		SWS_BILINEAR, NULL, NULL, NULL
789 	);
790 
791 	if (!scaler)
792 		goto fail;
793 
794 	S->channels[chid].videnc.scaler = scaler;
795 	S->channels[chid].videnc.frame = frame;
796 	S->channels[chid].videnc.packet = packet;
797 
798 	a12int_trace(A12_TRACE_VIDEO, "kind=codec_ok:ch=%d:codec=%d", chid, codecid);
799 	return true;
800 
801 fail:
802 	if (frame)
803 		av_frame_free(&frame);
804 	if (packet)
805 		av_packet_free(&packet);
806 	if (scaler)
807 		sws_freeContext(scaler);
808 	a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=could not setup codec");
809 	return false;
810 }
811 #endif
812 
a12int_encode_h264(PACK_ARGS)813 void a12int_encode_h264(PACK_ARGS)
814 {
815 /* A major complication here is that there is a requirement for the
816  * source- width and height to be evenly divisible by 2. The option
817  * then is to pad, or the cheap fallback of switching codec. Let us
818  * go with the cheap one for now. */
819 #ifdef WANT_H264_ENC
820 	if (vb->w % 2 != 0 || vb->h % 2 != 0){
821 		a12int_encode_drop(S, chid, true);
822 	}
823 
824 /* On resize, rebuild the encoder stage and send new headers etc. */
825 	else if (
826 		vb->w != S->channels[chid].videnc.w ||
827 		vb->h != S->channels[chid].videnc.h)
828 		a12int_encode_drop(S, chid, false);
829 
830 /* If we don't have an encoder (first time or reset due to resize),
831  * try to configure, and if the configuration fails (i.e. still no
832  * encoder set) fallback to DPNG and only try again on new size. */
833 	if (!S->channels[chid].videnc.encdec &&
834 			!S->channels[chid].videnc.failed){
835 		if (!open_videnc(S, opts, vb, chid, AV_CODEC_ID_H264)){
836 			a12int_trace(A12_TRACE_SYSTEM, "kind=error:message=h264 codec failed");
837 			a12int_encode_drop(S, chid, true);
838 		}
839 		else
840 			a12int_trace(A12_TRACE_VIDEO, "kind=status:ch=%d:message=set-h264", chid);
841 	}
842 
843 /* on failure, just fallback and retry alloc on dimensions change */
844 	if (S->channels[chid].videnc.failed)
845 		goto fallback;
846 
847 /* just for shorthand */
848 	AVFrame* frame = S->channels[chid].videnc.frame;
849 	AVCodecContext* encoder = S->channels[chid].videnc.encdec;
850 	AVPacket* packet = S->channels[chid].videnc.packet;
851 	struct SwsContext* scaler = S->channels[chid].videnc.scaler;
852 
853 /* missing:
854  *
855  * there is associated-data that can be set to the frame which the encoder
856  * can use - a big and interesting one is REGIONS_OF_INTEREST that can be
857  * combined with our dirty-rectangles to help the encoder along.
858  *
859  * that should be something like av_set_side_data() and an
860  * 'adaptive quantization' mode  (aq_mode == variance or autovariance)
861  *
862  * would be nice with representative examples first and quantifiers to
863  * assess the effect.
864  *
865  * other useful tuning is marking sbs for vr
866  */
867 
868 /* and color-convert from src into frame */
869 	int ret;
870 	const uint8_t* const src[] = {(uint8_t*)vb->buffer};
871 	int src_stride[] = {vb->stride};
872 	int rv = sws_scale(scaler,
873 		src, src_stride, 0, vb->h, frame->data, frame->linesize);
874 	if (rv < 0){
875 		a12int_trace(A12_TRACE_VIDEO, "rescaling failed: %d", rv);
876 		a12int_encode_drop(S, chid, true);
877 		goto fallback;
878 	}
879 
880 /* send to encoder, may return EAGAIN requesting a flush */
881 again:
882 	frame->pts++;
883 	ret = avcodec_send_frame(encoder, frame);
884 	if (ret < 0 && ret != AVERROR(EAGAIN)){
885 		a12int_trace(A12_TRACE_VIDEO, "encoder failed: %d", ret);
886 		a12int_encode_drop(S, chid, true);
887 		goto fallback;
888 	}
889 
890 /* flush, 0 is OK, < 0 and not EAGAIN is a real error */
891 	int out_ret;
892 	do {
893 		out_ret = avcodec_receive_packet(encoder, packet);
894 		if (out_ret == AVERROR(EAGAIN) || out_ret == AVERROR_EOF)
895 			return;
896 
897 		else if (out_ret < 0){
898 			a12int_trace(
899 				A12_TRACE_VIDEO, "error getting packet from encoder: %d", rv);
900 			a12int_encode_drop(S, chid, true);
901 			goto fallback;
902 		}
903 
904 		a12int_trace(A12_TRACE_VDETAIL, "videnc: %5d", packet->size);
905 
906 /* don't see a nice way to combine ffmpegs view of 'packets' and ours,
907  * maybe we could avoid it and the extra copy but uncertain */
908 		uint8_t hdr_buf[CONTROL_PACKET_SIZE];
909 		a12int_vframehdr_build(hdr_buf, S->last_seen_seqnr, chid,
910 			POSTPROCESS_VIDEO_H264, sid, vb->w, vb->h, vb->w, vb->h,
911 			0, 0, packet->size, vb->w * vb->h * 4, 1, vb->flags.origo_ll
912 		);
913 		a12int_step_vstream(S, sid);
914 		a12int_append_out(S,
915 			STATE_CONTROL_PACKET, hdr_buf, CONTROL_PACKET_SIZE, NULL, 0);
916 
917 		chunk_pack(S, STATE_VIDEO_PACKET, chid, packet->data, packet->size, chunk_sz);
918 		av_packet_unref(packet);
919 	}
920 	while (out_ret >= 0);
921 
922 /* frame never got encoded, should work now */
923 	if (ret == AVERROR(EAGAIN))
924 		goto again;
925 
926 	return;
927 
928 fallback:
929 	a12int_encode_dpng(FWD_ARGS);
930 #else
931 	a12int_encode_dpng(FWD_ARGS);
932 #endif
933 	a12int_trace(A12_TRACE_VIDEO, "switching to fallback (PNG) on videnc fail");
934 }
935