1 /*
2  * Copyright © 2019 Manuel Stoeckl
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial
14  * portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include "shadow.h"
27 #include "util.h"
28 
29 #include <fcntl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35 
36 struct compression_range {
37 	enum compression_mode mode;
38 	int min_val;
39 	int max_val;
40 	const char *desc;
41 };
42 
43 static const struct compression_range comp_ranges[] = {
44 		{COMP_NONE, 0, 0, "none"},
45 #ifdef HAS_LZ4
46 		{COMP_LZ4, -10, 16, "lz4"},
47 #endif
48 #ifdef HAS_ZSTD
49 		{COMP_ZSTD, -10, 22, "zstd"},
50 #endif
51 };
52 
create_text_like_image(size_t size)53 static void *create_text_like_image(size_t size)
54 {
55 	uint8_t *data = malloc(size);
56 	if (!data) {
57 		return NULL;
58 	}
59 	for (size_t i = 0; i < size; i++) {
60 		size_t step = i / 203 - i / 501;
61 		bool s = step % 2 == 0;
62 		data[i] = (uint8_t)(s ? ((step >> 1) & 0x2) + 0xfe : 0x00);
63 	}
64 	//	int f = open("1.rgb", O_RDONLY);
65 	//	read(f, data, size);
66 	//	close(f);
67 	return data;
68 }
create_video_like_image(size_t size)69 static void *create_video_like_image(size_t size)
70 {
71 	uint8_t *data = malloc(size);
72 	if (!data) {
73 		return NULL;
74 	}
75 	for (size_t i = 0; i < size; i++) {
76 		/* primary sequence, with runs, but avoiding obvious repetition
77 		 * then add fine grain, a main source of complexity in real
78 		 * images
79 		 */
80 		uint32_t noise = (uint32_t)rand() % 2;
81 		data[i] = (uint8_t)(i + i / 101 + i / 33 + noise);
82 	}
83 	//	int f = open("0.rgb", O_RDONLY);
84 	//	read(f, data, size);
85 	//	close(f);
86 	return data;
87 }
88 /** Create a shuffled variation of the original image. */
perturb(void * data,size_t size)89 static void perturb(void *data, size_t size)
90 {
91 	uint8_t *bytes = (uint8_t *)data;
92 	for (int i = 0; i < 50; i++) {
93 		// TODO: avoid redundant motion, and make this very fast
94 		size_t low = (size_t)rand() % size;
95 		size_t high = (size_t)rand() % size;
96 		if (low >= high) {
97 			continue;
98 		}
99 		for (size_t k = 0; k < (high - low) / 2; k++) {
100 			uint8_t tmp = bytes[low + k];
101 			bytes[low + k] = bytes[high - k];
102 			bytes[high - k] = tmp;
103 		}
104 	}
105 }
106 
107 struct bench_result {
108 	const struct compression_range *rng;
109 	int level;
110 	float comp_time, dcomp_time;
111 };
112 
float_compare(const void * a,const void * b)113 static int float_compare(const void *a, const void *b)
114 {
115 	float va = *(const float *)a;
116 	float vb = *(const float *)b;
117 	if (va < vb)
118 		return -1;
119 	if (va > vb)
120 		return 1;
121 	return 0;
122 }
compare_bench_result(const void * a,const void * b)123 static int compare_bench_result(const void *a, const void *b)
124 {
125 
126 	const struct bench_result *va = (const struct bench_result *)a;
127 	const struct bench_result *vb = (const struct bench_result *)b;
128 	if (va->comp_time < vb->comp_time)
129 		return -1;
130 	if (va->comp_time > vb->comp_time)
131 		return 1;
132 	return 0;
133 }
134 
135 struct diff_comp_results {
136 	/* Compressed packet size, in bytes */
137 	float packet_size;
138 	/* Time to construct compressed diff, in seconds */
139 	float diffcomp_time;
140 	/* Diff size / buffer size */
141 	float diff_frac;
142 	/* Compressed size / original size */
143 	float comp_frac;
144 };
145 
compare_timespec(const struct timespec * a,const struct timespec * b)146 static int compare_timespec(const struct timespec *a, const struct timespec *b)
147 {
148 	if (a->tv_sec != b->tv_sec)
149 		return a->tv_sec < b->tv_sec ? -1 : 1;
150 	if (a->tv_nsec != b->tv_nsec)
151 		return a->tv_nsec < b->tv_nsec ? -1 : 1;
152 	return 0;
153 }
154 
155 /* requires delta >= 0 */
timespec_add(struct timespec base,int64_t delta_ns)156 static struct timespec timespec_add(struct timespec base, int64_t delta_ns)
157 {
158 	struct timespec ret;
159 	ret.tv_sec = base.tv_sec + delta_ns / 1000000000LL;
160 	ret.tv_nsec = base.tv_nsec + delta_ns % 1000000000LL;
161 	if (ret.tv_nsec > 1000000000LL) {
162 		ret.tv_nsec -= 1000000000LL;
163 		ret.tv_sec++;
164 	}
165 	return ret;
166 }
167 
timespec_sub(struct timespec a,struct timespec b)168 static int64_t timespec_sub(struct timespec a, struct timespec b)
169 {
170 	return (a.tv_sec - b.tv_sec) * 1000000000LL + (a.tv_nsec - b.tv_nsec);
171 }
172 
173 #define NSAMPLES 5
174 
run_sub_bench(bool first,const struct compression_range * rng,int level,float bandwidth_mBps,int n_worker_threads,unsigned int seed,bool text_like,size_t test_size,void * image)175 static struct bench_result run_sub_bench(bool first,
176 		const struct compression_range *rng, int level,
177 		float bandwidth_mBps, int n_worker_threads, unsigned int seed,
178 		bool text_like, size_t test_size, void *image)
179 {
180 	/* Reset seed, so that all random image
181 	 * perturbations are consistent between runs */
182 	srand(seed);
183 
184 	/* Setup a shadow structure */
185 	struct thread_pool pool;
186 	setup_thread_pool(&pool, rng->mode, level, n_worker_threads);
187 	if (first) {
188 		printf("Running compression level benchmarks, assuming bandwidth=%g MB/s, with %d threads\n",
189 				bandwidth_mBps, pool.nthreads);
190 	}
191 
192 	struct fd_translation_map map;
193 	setup_translation_map(&map, false);
194 
195 	struct wmsg_open_file file_msg;
196 	file_msg.remote_id = 0;
197 	file_msg.file_size = (uint32_t)test_size;
198 	file_msg.size_and_type = transfer_header(
199 			sizeof(struct wmsg_open_file), WMSG_OPEN_FILE);
200 
201 	struct render_data render;
202 	memset(&render, 0, sizeof(render));
203 	render.disabled = true;
204 	render.drm_fd = 1;
205 	render.av_disabled = true;
206 
207 	struct bytebuf msg = {.size = sizeof(struct wmsg_open_file),
208 			.data = (char *)&file_msg};
209 	(void)apply_update(&map, &pool, &render, WMSG_OPEN_FILE, 0, &msg);
210 	struct shadow_fd *sfd = get_shadow_for_rid(&map, 0);
211 
212 	int iter = 0;
213 	float samples[NSAMPLES];
214 	float diff_frac[NSAMPLES], comp_frac[NSAMPLES];
215 	for (; !shutdown_flag && iter < NSAMPLES; iter++) {
216 
217 		/* Reset image state */
218 		memcpy(sfd->mem_local, image, test_size);
219 		memcpy(sfd->mem_mirror, image, test_size);
220 		perturb(sfd->mem_local, test_size);
221 		sfd->is_dirty = true;
222 		damage_everything(&sfd->damage);
223 
224 		/* Create transfer queue */
225 		struct transfer_queue transfer_data;
226 		memset(&transfer_data, 0, sizeof(struct transfer_queue));
227 		pthread_mutex_init(&transfer_data.async_recv_queue.lock, NULL);
228 
229 		struct timespec t0, t1;
230 		clock_gettime(CLOCK_REALTIME, &t0);
231 		collect_update(&pool, sfd, &transfer_data, false);
232 		start_parallel_work(&pool, &transfer_data.async_recv_queue);
233 
234 		/* A restricted main loop, in which transfer blocks are
235 		 * instantaneously consumed when previous blocks have been
236 		 * 'sent' */
237 		struct timespec next_write_time = {.tv_sec = 0, .tv_nsec = 0};
238 		size_t total_wire_size = 0;
239 		size_t net_diff_size = 0;
240 		while (1) {
241 			uint8_t flush[64];
242 			(void)read(pool.selfpipe_r, flush, sizeof(flush));
243 
244 			/* Run tasks on main thread, just like the main loop */
245 			bool done = false;
246 			struct task_data task;
247 			bool has_task = request_work_task(&pool, &task, &done);
248 			if (has_task) {
249 				run_task(&task, &pool.threads[0]);
250 
251 				pthread_mutex_lock(&pool.work_mutex);
252 				pool.tasks_in_progress--;
253 				pthread_mutex_unlock(&pool.work_mutex);
254 			}
255 
256 			struct timespec cur_time;
257 			clock_gettime(CLOCK_REALTIME, &cur_time);
258 			if (compare_timespec(&next_write_time, &cur_time) < 0) {
259 				transfer_load_async(&transfer_data);
260 				if (transfer_data.start < transfer_data.end) {
261 					struct iovec v =
262 							transfer_data.vecs
263 									[transfer_data.start++];
264 					float delay_s = (float)v.iov_len /
265 							(bandwidth_mBps * 1e6f);
266 					total_wire_size += v.iov_len;
267 					/* Only one message type will be
268 					 * produced for diffs */
269 					struct wmsg_buffer_diff *header =
270 							v.iov_base;
271 					net_diff_size += (size_t)(header->diff_size +
272 								  header->ntrailing);
273 
274 					/* Advance timer for next receipt */
275 					int64_t delay_ns = (int64_t)(delay_s *
276 								     1e9f);
277 					next_write_time = timespec_add(
278 							cur_time, delay_ns);
279 				}
280 			} else {
281 				/* Very short delay, for poll loop */
282 				bool tasks_remaining = false;
283 				pthread_mutex_lock(&pool.work_mutex);
284 				tasks_remaining = pool.stack_count > 0;
285 				pthread_mutex_unlock(&pool.work_mutex);
286 
287 				struct timespec delay_time;
288 				delay_time.tv_sec = 0;
289 				delay_time.tv_nsec = 10000;
290 				if (!tasks_remaining) {
291 					int64_t nsecs_left = timespec_sub(
292 							next_write_time,
293 							cur_time);
294 					if (nsecs_left > 1000000000LL) {
295 						nsecs_left = 1000000000LL;
296 					}
297 					if (nsecs_left > delay_time.tv_nsec) {
298 						delay_time.tv_nsec = nsecs_left;
299 					}
300 				}
301 				nanosleep(&delay_time, NULL);
302 			}
303 			bool all_sent = false;
304 			all_sent = transfer_data.start == transfer_data.end;
305 
306 			if (done && all_sent) {
307 				break;
308 			}
309 		}
310 
311 		finish_update(sfd);
312 		cleanup_transfer_queue(&transfer_data);
313 		clock_gettime(CLOCK_REALTIME, &t1);
314 
315 		struct diff_comp_results r;
316 		r.packet_size = (float)total_wire_size;
317 		r.diffcomp_time = 1.0f * (float)(t1.tv_sec - t0.tv_sec) +
318 				  1e-9f * (float)(t1.tv_nsec - t0.tv_nsec);
319 		r.comp_frac = r.packet_size / (float)net_diff_size;
320 		r.diff_frac = (float)net_diff_size / (float)test_size;
321 
322 		samples[iter] = r.diffcomp_time;
323 		diff_frac[iter] = r.diff_frac;
324 		comp_frac[iter] = r.comp_frac;
325 	}
326 
327 	/* Cleanup sfd and helper structures */
328 	cleanup_thread_pool(&pool);
329 	cleanup_translation_map(&map);
330 
331 	qsort(samples, (size_t)iter, sizeof(float), float_compare);
332 	qsort(diff_frac, (size_t)iter, sizeof(float), float_compare);
333 	qsort(comp_frac, (size_t)iter, sizeof(float), float_compare);
334 	/* Using order statistics, because moment statistics a) require
335 	 * libm; b) don't work well with outliers. */
336 	float median = samples[iter / 2];
337 	float hiqr = (samples[(iter * 3) / 4] - samples[iter / 4]) / 2;
338 	float dmedian = diff_frac[iter / 2];
339 	float dhiqr = (diff_frac[(iter * 3) / 4] - diff_frac[iter / 4]) / 2;
340 	float cmedian = comp_frac[iter / 2];
341 	float chiqr = (comp_frac[(iter * 3) / 4] - comp_frac[iter / 4]) / 2;
342 
343 	struct bench_result res;
344 	res.rng = rng;
345 	res.level = level;
346 	printf("%s, %s=%d: transfer %f+/-%f sec, diff %f+/-%f, comp %f+/-%f\n",
347 			text_like ? "txt" : "img", rng->desc, level, median,
348 			hiqr, dmedian, dhiqr, cmedian, chiqr);
349 
350 	res.comp_time = median;
351 	res.dcomp_time = hiqr;
352 	return res;
353 }
354 
run_bench(float bandwidth_mBps,uint32_t test_size,int n_worker_threads)355 int run_bench(float bandwidth_mBps, uint32_t test_size, int n_worker_threads)
356 {
357 	/* 4MB test image - 1024x1024x4. Any smaller, and unrealistic caching
358 	 * speedups may occur */
359 	struct timespec tp;
360 	clock_gettime(CLOCK_REALTIME, &tp);
361 
362 	srand((unsigned int)tp.tv_nsec);
363 	void *text_image = create_text_like_image(test_size);
364 	void *vid_image = create_video_like_image(test_size);
365 	if (!text_image || !vid_image) {
366 		free(text_image);
367 		free(vid_image);
368 		wp_error("Failed to allocate test images");
369 		return EXIT_FAILURE;
370 	}
371 
372 	/* Q: store an array of all the modes -> outputs */
373 	// Then sort that array
374 	int ntests = 0;
375 	for (size_t c = 0; c < sizeof(comp_ranges) / sizeof(comp_ranges[0]);
376 			c++) {
377 		ntests += comp_ranges[c].max_val - comp_ranges[c].min_val + 1;
378 	}
379 
380 	/* For the content, the mode is generally consistent */
381 
382 	struct bench_result *tresults =
383 			calloc((size_t)ntests, sizeof(struct bench_result));
384 	struct bench_result *iresults =
385 			calloc((size_t)ntests, sizeof(struct bench_result));
386 	int ntres = 0, nires = 0;
387 	for (int k = 0; k < 2; k++) {
388 		bool text_like = k == 0;
389 		int j = 0;
390 		for (size_t c = 0;
391 				!shutdown_flag &&
392 				c < sizeof(comp_ranges) / sizeof(comp_ranges[0]);
393 				c++) {
394 			for (int lvl = comp_ranges[c].min_val;
395 					!shutdown_flag &&
396 					lvl <= comp_ranges[c].max_val;
397 					lvl++) {
398 
399 				struct bench_result res = run_sub_bench(j == 0,
400 						&comp_ranges[c], lvl,
401 						bandwidth_mBps,
402 						n_worker_threads,
403 						(unsigned int)tp.tv_nsec,
404 						text_like, test_size,
405 						text_like ? text_image
406 							  : vid_image);
407 				if (text_like) {
408 					tresults[j++] = res;
409 					ntres++;
410 				} else {
411 					iresults[j++] = res;
412 					nires++;
413 				}
414 			}
415 		}
416 	}
417 	for (int k = 0; k < 2; k++) {
418 		bool text_like = k == 0;
419 		struct bench_result *results = text_like ? tresults : iresults;
420 		int nr = text_like ? ntres : nires;
421 		if (nr <= 0) {
422 			continue;
423 		}
424 
425 		/* Print best recommendation */
426 		qsort(results, (size_t)nr, sizeof(struct bench_result),
427 				compare_bench_result);
428 
429 		struct bench_result best = results[0];
430 		printf("%s, best compression level: \"%s=%d\", with %f+/-%f sec for sample transfer\n",
431 				text_like ? "Text heavy image"
432 					  : "Photo-like image",
433 				best.rng->desc, best.level, best.comp_time,
434 				best.dcomp_time);
435 	}
436 	free(tresults);
437 	free(iresults);
438 
439 	free(vid_image);
440 	free(text_image);
441 	return EXIT_SUCCESS;
442 }
443