1 /*
2 * Copyright © 2019 Manuel Stoeckl
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial
14 * portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include "shadow.h"
27 #include "util.h"
28
29 #include <fcntl.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <time.h>
34 #include <unistd.h>
35
36 struct compression_range {
37 enum compression_mode mode;
38 int min_val;
39 int max_val;
40 const char *desc;
41 };
42
43 static const struct compression_range comp_ranges[] = {
44 {COMP_NONE, 0, 0, "none"},
45 #ifdef HAS_LZ4
46 {COMP_LZ4, -10, 16, "lz4"},
47 #endif
48 #ifdef HAS_ZSTD
49 {COMP_ZSTD, -10, 22, "zstd"},
50 #endif
51 };
52
create_text_like_image(size_t size)53 static void *create_text_like_image(size_t size)
54 {
55 uint8_t *data = malloc(size);
56 if (!data) {
57 return NULL;
58 }
59 for (size_t i = 0; i < size; i++) {
60 size_t step = i / 203 - i / 501;
61 bool s = step % 2 == 0;
62 data[i] = (uint8_t)(s ? ((step >> 1) & 0x2) + 0xfe : 0x00);
63 }
64 // int f = open("1.rgb", O_RDONLY);
65 // read(f, data, size);
66 // close(f);
67 return data;
68 }
create_video_like_image(size_t size)69 static void *create_video_like_image(size_t size)
70 {
71 uint8_t *data = malloc(size);
72 if (!data) {
73 return NULL;
74 }
75 for (size_t i = 0; i < size; i++) {
76 /* primary sequence, with runs, but avoiding obvious repetition
77 * then add fine grain, a main source of complexity in real
78 * images
79 */
80 uint32_t noise = (uint32_t)rand() % 2;
81 data[i] = (uint8_t)(i + i / 101 + i / 33 + noise);
82 }
83 // int f = open("0.rgb", O_RDONLY);
84 // read(f, data, size);
85 // close(f);
86 return data;
87 }
88 /** Create a shuffled variation of the original image. */
perturb(void * data,size_t size)89 static void perturb(void *data, size_t size)
90 {
91 uint8_t *bytes = (uint8_t *)data;
92 for (int i = 0; i < 50; i++) {
93 // TODO: avoid redundant motion, and make this very fast
94 size_t low = (size_t)rand() % size;
95 size_t high = (size_t)rand() % size;
96 if (low >= high) {
97 continue;
98 }
99 for (size_t k = 0; k < (high - low) / 2; k++) {
100 uint8_t tmp = bytes[low + k];
101 bytes[low + k] = bytes[high - k];
102 bytes[high - k] = tmp;
103 }
104 }
105 }
106
107 struct bench_result {
108 const struct compression_range *rng;
109 int level;
110 float comp_time, dcomp_time;
111 };
112
float_compare(const void * a,const void * b)113 static int float_compare(const void *a, const void *b)
114 {
115 float va = *(const float *)a;
116 float vb = *(const float *)b;
117 if (va < vb)
118 return -1;
119 if (va > vb)
120 return 1;
121 return 0;
122 }
compare_bench_result(const void * a,const void * b)123 static int compare_bench_result(const void *a, const void *b)
124 {
125
126 const struct bench_result *va = (const struct bench_result *)a;
127 const struct bench_result *vb = (const struct bench_result *)b;
128 if (va->comp_time < vb->comp_time)
129 return -1;
130 if (va->comp_time > vb->comp_time)
131 return 1;
132 return 0;
133 }
134
135 struct diff_comp_results {
136 /* Compressed packet size, in bytes */
137 float packet_size;
138 /* Time to construct compressed diff, in seconds */
139 float diffcomp_time;
140 /* Diff size / buffer size */
141 float diff_frac;
142 /* Compressed size / original size */
143 float comp_frac;
144 };
145
compare_timespec(const struct timespec * a,const struct timespec * b)146 static int compare_timespec(const struct timespec *a, const struct timespec *b)
147 {
148 if (a->tv_sec != b->tv_sec)
149 return a->tv_sec < b->tv_sec ? -1 : 1;
150 if (a->tv_nsec != b->tv_nsec)
151 return a->tv_nsec < b->tv_nsec ? -1 : 1;
152 return 0;
153 }
154
155 /* requires delta >= 0 */
timespec_add(struct timespec base,int64_t delta_ns)156 static struct timespec timespec_add(struct timespec base, int64_t delta_ns)
157 {
158 struct timespec ret;
159 ret.tv_sec = base.tv_sec + delta_ns / 1000000000LL;
160 ret.tv_nsec = base.tv_nsec + delta_ns % 1000000000LL;
161 if (ret.tv_nsec > 1000000000LL) {
162 ret.tv_nsec -= 1000000000LL;
163 ret.tv_sec++;
164 }
165 return ret;
166 }
167
timespec_sub(struct timespec a,struct timespec b)168 static int64_t timespec_sub(struct timespec a, struct timespec b)
169 {
170 return (a.tv_sec - b.tv_sec) * 1000000000LL + (a.tv_nsec - b.tv_nsec);
171 }
172
173 #define NSAMPLES 5
174
run_sub_bench(bool first,const struct compression_range * rng,int level,float bandwidth_mBps,int n_worker_threads,unsigned int seed,bool text_like,size_t test_size,void * image)175 static struct bench_result run_sub_bench(bool first,
176 const struct compression_range *rng, int level,
177 float bandwidth_mBps, int n_worker_threads, unsigned int seed,
178 bool text_like, size_t test_size, void *image)
179 {
180 /* Reset seed, so that all random image
181 * perturbations are consistent between runs */
182 srand(seed);
183
184 /* Setup a shadow structure */
185 struct thread_pool pool;
186 setup_thread_pool(&pool, rng->mode, level, n_worker_threads);
187 if (first) {
188 printf("Running compression level benchmarks, assuming bandwidth=%g MB/s, with %d threads\n",
189 bandwidth_mBps, pool.nthreads);
190 }
191
192 struct fd_translation_map map;
193 setup_translation_map(&map, false);
194
195 struct wmsg_open_file file_msg;
196 file_msg.remote_id = 0;
197 file_msg.file_size = (uint32_t)test_size;
198 file_msg.size_and_type = transfer_header(
199 sizeof(struct wmsg_open_file), WMSG_OPEN_FILE);
200
201 struct render_data render;
202 memset(&render, 0, sizeof(render));
203 render.disabled = true;
204 render.drm_fd = 1;
205 render.av_disabled = true;
206
207 struct bytebuf msg = {.size = sizeof(struct wmsg_open_file),
208 .data = (char *)&file_msg};
209 (void)apply_update(&map, &pool, &render, WMSG_OPEN_FILE, 0, &msg);
210 struct shadow_fd *sfd = get_shadow_for_rid(&map, 0);
211
212 int iter = 0;
213 float samples[NSAMPLES];
214 float diff_frac[NSAMPLES], comp_frac[NSAMPLES];
215 for (; !shutdown_flag && iter < NSAMPLES; iter++) {
216
217 /* Reset image state */
218 memcpy(sfd->mem_local, image, test_size);
219 memcpy(sfd->mem_mirror, image, test_size);
220 perturb(sfd->mem_local, test_size);
221 sfd->is_dirty = true;
222 damage_everything(&sfd->damage);
223
224 /* Create transfer queue */
225 struct transfer_queue transfer_data;
226 memset(&transfer_data, 0, sizeof(struct transfer_queue));
227 pthread_mutex_init(&transfer_data.async_recv_queue.lock, NULL);
228
229 struct timespec t0, t1;
230 clock_gettime(CLOCK_REALTIME, &t0);
231 collect_update(&pool, sfd, &transfer_data, false);
232 start_parallel_work(&pool, &transfer_data.async_recv_queue);
233
234 /* A restricted main loop, in which transfer blocks are
235 * instantaneously consumed when previous blocks have been
236 * 'sent' */
237 struct timespec next_write_time = {.tv_sec = 0, .tv_nsec = 0};
238 size_t total_wire_size = 0;
239 size_t net_diff_size = 0;
240 while (1) {
241 uint8_t flush[64];
242 (void)read(pool.selfpipe_r, flush, sizeof(flush));
243
244 /* Run tasks on main thread, just like the main loop */
245 bool done = false;
246 struct task_data task;
247 bool has_task = request_work_task(&pool, &task, &done);
248 if (has_task) {
249 run_task(&task, &pool.threads[0]);
250
251 pthread_mutex_lock(&pool.work_mutex);
252 pool.tasks_in_progress--;
253 pthread_mutex_unlock(&pool.work_mutex);
254 }
255
256 struct timespec cur_time;
257 clock_gettime(CLOCK_REALTIME, &cur_time);
258 if (compare_timespec(&next_write_time, &cur_time) < 0) {
259 transfer_load_async(&transfer_data);
260 if (transfer_data.start < transfer_data.end) {
261 struct iovec v =
262 transfer_data.vecs
263 [transfer_data.start++];
264 float delay_s = (float)v.iov_len /
265 (bandwidth_mBps * 1e6f);
266 total_wire_size += v.iov_len;
267 /* Only one message type will be
268 * produced for diffs */
269 struct wmsg_buffer_diff *header =
270 v.iov_base;
271 net_diff_size += (size_t)(header->diff_size +
272 header->ntrailing);
273
274 /* Advance timer for next receipt */
275 int64_t delay_ns = (int64_t)(delay_s *
276 1e9f);
277 next_write_time = timespec_add(
278 cur_time, delay_ns);
279 }
280 } else {
281 /* Very short delay, for poll loop */
282 bool tasks_remaining = false;
283 pthread_mutex_lock(&pool.work_mutex);
284 tasks_remaining = pool.stack_count > 0;
285 pthread_mutex_unlock(&pool.work_mutex);
286
287 struct timespec delay_time;
288 delay_time.tv_sec = 0;
289 delay_time.tv_nsec = 10000;
290 if (!tasks_remaining) {
291 int64_t nsecs_left = timespec_sub(
292 next_write_time,
293 cur_time);
294 if (nsecs_left > 1000000000LL) {
295 nsecs_left = 1000000000LL;
296 }
297 if (nsecs_left > delay_time.tv_nsec) {
298 delay_time.tv_nsec = nsecs_left;
299 }
300 }
301 nanosleep(&delay_time, NULL);
302 }
303 bool all_sent = false;
304 all_sent = transfer_data.start == transfer_data.end;
305
306 if (done && all_sent) {
307 break;
308 }
309 }
310
311 finish_update(sfd);
312 cleanup_transfer_queue(&transfer_data);
313 clock_gettime(CLOCK_REALTIME, &t1);
314
315 struct diff_comp_results r;
316 r.packet_size = (float)total_wire_size;
317 r.diffcomp_time = 1.0f * (float)(t1.tv_sec - t0.tv_sec) +
318 1e-9f * (float)(t1.tv_nsec - t0.tv_nsec);
319 r.comp_frac = r.packet_size / (float)net_diff_size;
320 r.diff_frac = (float)net_diff_size / (float)test_size;
321
322 samples[iter] = r.diffcomp_time;
323 diff_frac[iter] = r.diff_frac;
324 comp_frac[iter] = r.comp_frac;
325 }
326
327 /* Cleanup sfd and helper structures */
328 cleanup_thread_pool(&pool);
329 cleanup_translation_map(&map);
330
331 qsort(samples, (size_t)iter, sizeof(float), float_compare);
332 qsort(diff_frac, (size_t)iter, sizeof(float), float_compare);
333 qsort(comp_frac, (size_t)iter, sizeof(float), float_compare);
334 /* Using order statistics, because moment statistics a) require
335 * libm; b) don't work well with outliers. */
336 float median = samples[iter / 2];
337 float hiqr = (samples[(iter * 3) / 4] - samples[iter / 4]) / 2;
338 float dmedian = diff_frac[iter / 2];
339 float dhiqr = (diff_frac[(iter * 3) / 4] - diff_frac[iter / 4]) / 2;
340 float cmedian = comp_frac[iter / 2];
341 float chiqr = (comp_frac[(iter * 3) / 4] - comp_frac[iter / 4]) / 2;
342
343 struct bench_result res;
344 res.rng = rng;
345 res.level = level;
346 printf("%s, %s=%d: transfer %f+/-%f sec, diff %f+/-%f, comp %f+/-%f\n",
347 text_like ? "txt" : "img", rng->desc, level, median,
348 hiqr, dmedian, dhiqr, cmedian, chiqr);
349
350 res.comp_time = median;
351 res.dcomp_time = hiqr;
352 return res;
353 }
354
run_bench(float bandwidth_mBps,uint32_t test_size,int n_worker_threads)355 int run_bench(float bandwidth_mBps, uint32_t test_size, int n_worker_threads)
356 {
357 /* 4MB test image - 1024x1024x4. Any smaller, and unrealistic caching
358 * speedups may occur */
359 struct timespec tp;
360 clock_gettime(CLOCK_REALTIME, &tp);
361
362 srand((unsigned int)tp.tv_nsec);
363 void *text_image = create_text_like_image(test_size);
364 void *vid_image = create_video_like_image(test_size);
365 if (!text_image || !vid_image) {
366 free(text_image);
367 free(vid_image);
368 wp_error("Failed to allocate test images");
369 return EXIT_FAILURE;
370 }
371
372 /* Q: store an array of all the modes -> outputs */
373 // Then sort that array
374 int ntests = 0;
375 for (size_t c = 0; c < sizeof(comp_ranges) / sizeof(comp_ranges[0]);
376 c++) {
377 ntests += comp_ranges[c].max_val - comp_ranges[c].min_val + 1;
378 }
379
380 /* For the content, the mode is generally consistent */
381
382 struct bench_result *tresults =
383 calloc((size_t)ntests, sizeof(struct bench_result));
384 struct bench_result *iresults =
385 calloc((size_t)ntests, sizeof(struct bench_result));
386 int ntres = 0, nires = 0;
387 for (int k = 0; k < 2; k++) {
388 bool text_like = k == 0;
389 int j = 0;
390 for (size_t c = 0;
391 !shutdown_flag &&
392 c < sizeof(comp_ranges) / sizeof(comp_ranges[0]);
393 c++) {
394 for (int lvl = comp_ranges[c].min_val;
395 !shutdown_flag &&
396 lvl <= comp_ranges[c].max_val;
397 lvl++) {
398
399 struct bench_result res = run_sub_bench(j == 0,
400 &comp_ranges[c], lvl,
401 bandwidth_mBps,
402 n_worker_threads,
403 (unsigned int)tp.tv_nsec,
404 text_like, test_size,
405 text_like ? text_image
406 : vid_image);
407 if (text_like) {
408 tresults[j++] = res;
409 ntres++;
410 } else {
411 iresults[j++] = res;
412 nires++;
413 }
414 }
415 }
416 }
417 for (int k = 0; k < 2; k++) {
418 bool text_like = k == 0;
419 struct bench_result *results = text_like ? tresults : iresults;
420 int nr = text_like ? ntres : nires;
421 if (nr <= 0) {
422 continue;
423 }
424
425 /* Print best recommendation */
426 qsort(results, (size_t)nr, sizeof(struct bench_result),
427 compare_bench_result);
428
429 struct bench_result best = results[0];
430 printf("%s, best compression level: \"%s=%d\", with %f+/-%f sec for sample transfer\n",
431 text_like ? "Text heavy image"
432 : "Photo-like image",
433 best.rng->desc, best.level, best.comp_time,
434 best.dcomp_time);
435 }
436 free(tresults);
437 free(iresults);
438
439 free(vid_image);
440 free(text_image);
441 return EXIT_SUCCESS;
442 }
443