1 /*
2 * Copyright © 2019 Manuel Stoeckl
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the
13 * next paragraph) shall be included in all copies or substantial
14 * portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 */
25
26 #include "common.h"
27 #include "shadow.h"
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33
rand_gap_fill(char * data,size_t size,int max_run)34 static int64_t rand_gap_fill(char *data, size_t size, int max_run)
35 {
36 if (max_run == -1) {
37 memset(data, rand(), size);
38 return 1;
39 } else if (max_run == -2) {
40 memset(data, 0, size);
41 return 0;
42 }
43
44 max_run = max(2, max_run);
45 size_t pos = 0;
46 int64_t nruns = 0;
47 while (pos < size) {
48 int gap1 = (rand() % max_run);
49 gap1 = min((int)(size - pos), gap1);
50 pos += (size_t)gap1;
51 int gap2 = (rand() % max_run);
52 gap2 = min((int)(size - pos), gap2);
53 int val = rand();
54 memset(&data[pos], val, (size_t)gap2);
55 pos += (size_t)gap2;
56 nruns++;
57 }
58 return nruns;
59 }
60
61 struct subtest {
62 size_t size;
63 int max_gap;
64 uint32_t seed;
65 int shards;
66 };
67
68 static const struct subtest subtests[] = {
69 {256, 128, 0x11, 3},
70 {333333, 128, 0x11, 3},
71 {39, 2, 0x13, 17},
72 {10000000, 262144, 0x21, 1},
73 {4, 4, 0x41, 1},
74 {65537, 177, 0x51, 1},
75 {17777, 2, 0x61, 1},
76 {60005, 60005, 0x71, 1},
77 {1 << 16, -1, 0x71, 4},
78 {1 << 16, -2, 0x71, 4},
79 {1 << 24, -1, 0x71, 4},
80 {1 << 24, -2, 0x71, 4},
81 };
82
83 static const enum diff_type diff_types[5] = {
84 DIFF_AVX512F,
85 DIFF_AVX2,
86 DIFF_SSE3,
87 DIFF_NEON,
88 DIFF_C,
89 };
90 static const char *diff_names[5] = {
91 "avx512",
92 "avx2 ",
93 "sse3 ",
94 "neon ",
95 "plainC",
96 };
97
run_subtest(int i,const struct subtest test,char * diff,char * source,char * mirror,char * target1,char * target2,interval_diff_fn_t diff_fn,int alignment_bits,const char * diff_name)98 static bool run_subtest(int i, const struct subtest test, char *diff,
99 char *source, char *mirror, char *target1, char *target2,
100 interval_diff_fn_t diff_fn, int alignment_bits,
101 const char *diff_name)
102 {
103 uint64_t ns01 = 0, ns12 = 0;
104 int64_t nruns = 0;
105 size_t net_diffsize = 0;
106 srand((uint32_t)test.seed);
107 memset(mirror, 0, test.size);
108 memset(target1, 0, test.size);
109 memset(target2, 0, test.size);
110
111 int roughtime = (int)test.size + test.shards * 500;
112 int repetitions = min(100, max(1000000000 / roughtime, 1));
113
114 bool all_success = true;
115 for (int x = 0; x < repetitions; x++) {
116 nruns += rand_gap_fill(source, test.size, test.max_gap);
117
118 net_diffsize = 0;
119 for (int s = 0; s < test.shards; s++) {
120
121 struct interval damage;
122 damage.start = split_interval(
123 0, (int)test.size, test.shards, s);
124 damage.end = split_interval(
125 0, (int)test.size, test.shards, s + 1);
126 int alignment = 1 << alignment_bits;
127 damage.start = alignment * (damage.start / alignment);
128 damage.end = alignment * (damage.end / alignment);
129
130 struct timespec t0, t1, t2;
131 clock_gettime(CLOCK_MONOTONIC, &t0);
132 size_t diffsize = 0;
133 if (damage.start < damage.end) {
134 diffsize = construct_diff_core(diff_fn,
135 alignment_bits, &damage, 1,
136 mirror, source, diff);
137 }
138 size_t ntrailing = 0;
139 if (s == test.shards - 1) {
140 ntrailing = construct_diff_trailing(test.size,
141 alignment_bits, mirror, source,
142 diff + diffsize);
143 }
144 clock_gettime(CLOCK_MONOTONIC, &t1);
145 apply_diff(test.size, target1, target2, diffsize,
146 ntrailing, diff);
147 clock_gettime(CLOCK_MONOTONIC, &t2);
148 ns01 += (uint64_t)((t1.tv_sec - t0.tv_sec) *
149 1000000000LL +
150 (t1.tv_nsec - t0.tv_nsec));
151 ns12 += (uint64_t)((t2.tv_sec - t1.tv_sec) *
152 1000000000LL +
153 (t2.tv_nsec - t1.tv_nsec));
154 net_diffsize += diffsize + ntrailing;
155 }
156
157 if (memcmp(target1, source, test.size)) {
158 printf("Failed to synchronize\n");
159 int ndiff = 0;
160 for (size_t k = 0; k < test.size; k++) {
161 if (target1[k] != source[k] ||
162 mirror[k] != source[k]) {
163 if (ndiff > 300) {
164 printf("and still more differences\n");
165 break;
166 }
167 printf("i %d: target1 %02x mirror %02x source %02x\n",
168 (int)k,
169 (uint8_t)target1[k],
170 (uint8_t)mirror[k],
171 (uint8_t)source[k]);
172 ndiff++;
173 }
174 }
175 all_success = false;
176 break;
177 }
178 }
179
180 double scale = 1.0 / ((double)repetitions * (double)test.size);
181 printf("%s #%2d, : %6.3f,%6.3f,%6.3f ns/byte create,apply,net (%d/%d@%d), %.1f bytes/run\n",
182 diff_name, i, (double)ns01 * scale,
183 (double)ns12 * scale, (double)(ns01 + ns12) * scale,
184 (int)net_diffsize, (int)test.size, test.shards,
185 (double)repetitions * (double)test.size /
186 (double)nruns);
187 return all_success;
188 }
189
190 log_handler_func_t log_funcs[2] = {test_log_handler, test_log_handler};
main(int argc,char ** argv)191 int main(int argc, char **argv)
192 {
193 (void)argc;
194 (void)argv;
195
196 bool all_success = true;
197
198 const int nsubtests = (sizeof(subtests) / sizeof(subtests[0]));
199 for (int i = 0; i < nsubtests; i++) {
200 struct subtest test = subtests[i];
201
202 /* Use maximum alignment */
203 const size_t bufsize = alignz(test.size + 8 + 64, 64);
204 char *diff = aligned_alloc(64, bufsize);
205 char *source = aligned_alloc(64, bufsize);
206 char *mirror = aligned_alloc(64, bufsize);
207 char *target1 = aligned_alloc(64, bufsize);
208 char *target2 = aligned_alloc(64, bufsize);
209 const int ntypes = sizeof(diff_types) / sizeof(diff_types[0]);
210 for (int a = 0; a < ntypes; a++) {
211 int alignment_bits;
212 interval_diff_fn_t diff_fn = get_diff_function(
213 diff_types[a], &alignment_bits);
214 if (!diff_fn) {
215 continue;
216 }
217 all_success &= run_subtest(i, test, diff, source,
218 mirror, target1, target2, diff_fn,
219 alignment_bits, diff_names[a]);
220 }
221 free(diff);
222 free(source);
223 free(mirror);
224 free(target1);
225 free(target2);
226 }
227
228 return all_success ? EXIT_SUCCESS : EXIT_FAILURE;
229 }
230