1 /*
2  * Copyright © 2019 Manuel Stoeckl
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the
13  * next paragraph) shall be included in all copies or substantial
14  * portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  */
25 
26 #include "shadow.h"
27 
28 #include <stdlib.h>
29 #include <string.h>
30 
31 struct merge_stack_elem {
32 	int offset;
33 	int count;
34 };
35 struct merge_stack {
36 	struct interval *data;
37 	int size;
38 	int count;
39 };
40 
stream_merge(int a_count,const struct interval * __restrict__ a_list,int b_count,const struct interval * __restrict__ b_list,struct interval * __restrict__ c_list,int margin)41 static int stream_merge(int a_count, const struct interval *__restrict__ a_list,
42 		int b_count, const struct interval *__restrict__ b_list,
43 		struct interval *__restrict__ c_list, int margin)
44 {
45 	int ia = 0, ib = 0, ic = 0;
46 	int cursor = INT32_MIN;
47 	(void)a_count;
48 	(void)b_count;
49 
50 	/* the loop exit condition appears to be faster than checking
51 	 * ia<a_count||ib<b_count */
52 	while (!(a_list[ia].start == INT32_MAX &&
53 			b_list[ib].start == INT32_MAX)) {
54 		/* TODO: write a simd optimized version, in (?) kernel_sse.c,
55 		 * that selects 4 elements at a time. Sentinels are free,
56 		 * after all */
57 		struct interval sel;
58 		if (a_list[ia].start < b_list[ib].start) {
59 			sel = a_list[ia++];
60 		} else {
61 			sel = b_list[ib++];
62 		}
63 
64 		/* which path is more likely depends on the structure of
65 		 * the result; branch prediction works very well here */
66 		int new_cursor = max(cursor, sel.end);
67 		if (sel.start >= cursor + margin) {
68 			c_list[ic++] = sel;
69 		} else {
70 			c_list[ic - 1].end = new_cursor;
71 		}
72 		cursor = new_cursor;
73 	}
74 
75 	/* add end sentinel */
76 	c_list[ic] = (struct interval){.start = INT32_MAX, .end = INT32_MAX};
77 
78 	return ic;
79 }
80 
fix_merge_stack_property(int size,struct merge_stack_elem * stack,struct merge_stack * base,struct merge_stack * temp,int merge_margin,bool force_compact,int * absorbed)81 static int fix_merge_stack_property(int size, struct merge_stack_elem *stack,
82 		struct merge_stack *base, struct merge_stack *temp,
83 		int merge_margin, bool force_compact, int *absorbed)
84 {
85 	while (size > 1) {
86 		struct merge_stack_elem top = stack[size - 1];
87 		struct merge_stack_elem nxt = stack[size - 2];
88 
89 		if (2 * top.count <= nxt.count && !force_compact) {
90 			return size;
91 		}
92 
93 		if (buf_ensure_size(top.count + nxt.count + 1,
94 				    sizeof(struct interval), &temp->size,
95 				    (void **)&temp->data) == -1) {
96 			wp_error("Failed to resize a merge buffer, some damage intervals may be lost");
97 			return size;
98 		}
99 
100 		int xs = stream_merge(top.count, &base->data[top.offset],
101 				nxt.count, &base->data[nxt.offset], temp->data,
102 				merge_margin);
103 		/* There are more complicated/multi-buffer alternatives with
104 		 * fewer memory copies, but this is already <20% of stream
105 		 * merge time */
106 		memcpy(&base->data[nxt.offset], temp->data,
107 				(size_t)(xs + 1) * sizeof(struct interval));
108 		base->count = nxt.offset + xs + 1;
109 
110 		stack[size - 1] = (struct merge_stack_elem){
111 				.offset = 0, .count = 0};
112 		stack[size - 2] = (struct merge_stack_elem){
113 				.offset = nxt.offset, .count = xs};
114 		size--;
115 
116 		*absorbed += (top.count + nxt.count - xs);
117 	}
118 	return size;
119 }
120 
unpack_ext_interval(struct interval * vec,const struct ext_interval e,int alignment_bits)121 static int unpack_ext_interval(struct interval *vec,
122 		const struct ext_interval e, int alignment_bits)
123 {
124 	int iw = 0;
125 	int last_end = INT32_MIN;
126 	for (int ir = 0; ir < e.rep; ir++) {
127 		int start = e.start + ir * e.stride;
128 		int end = start + e.width;
129 		start = (start >> alignment_bits) << alignment_bits;
130 		end = ((end + (1 << alignment_bits) - 1) >> alignment_bits)
131 		      << alignment_bits;
132 
133 		if (start > last_end) {
134 			vec[iw].start = start;
135 			vec[iw].end = end;
136 			last_end = end;
137 			iw++;
138 		} else {
139 			vec[iw - 1].end = end;
140 			last_end = end;
141 		}
142 	}
143 	/* end sentinel */
144 	vec[iw] = (struct interval){.start = INT32_MAX, .end = INT32_MAX};
145 	return iw;
146 }
147 
148 /* By writing a mergesort by hand, we can detect duplicates early.
149  *
150  * TODO: optimize output with run-length-encoded segments
151  * TODO: explicit time limiting/adaptive margin! */
merge_mergesort(const int old_count,struct interval * old_list,const int new_count,const struct ext_interval * const new_list,int * dst_count,struct interval ** dst_list,int merge_margin,int alignment_bits)152 void merge_mergesort(const int old_count, struct interval *old_list,
153 		const int new_count, const struct ext_interval *const new_list,
154 		int *dst_count, struct interval **dst_list, int merge_margin,
155 		int alignment_bits)
156 {
157 	/* Stack-based mergesort: the buffer at position `i+1`
158 	 * should be <= 1/2 times the size of the buffer at
159 	 * position `i`; buffers will be merged
160 	 * to maintain this invariant */
161 	// TODO: improve memory management!
162 	struct merge_stack_elem substack[32];
163 	int substack_size = 0;
164 	memset(substack, 0, sizeof(substack));
165 	struct merge_stack base = {.data = NULL, .count = 0, .size = 0};
166 	struct merge_stack temp = {.data = NULL, .count = 0, .size = 0};
167 	if (old_count) {
168 		/* seed the stack with the previous damage
169 		 * interval list,
170 		 * including trailing terminator */
171 		base.data = old_list;
172 		base.size = old_count + 1;
173 		base.count = old_count + 1;
174 		substack[substack_size++] = (struct merge_stack_elem){
175 				.offset = 0, .count = old_count};
176 	}
177 
178 	int src_count = 0, absorbed = 0;
179 
180 	for (int jn = 0; jn < new_count; jn++) {
181 		struct ext_interval e = new_list[jn];
182 		/* ignore invalid intervals -- also, if e.start
183 		 * is close to INT32_MIN, the stream merge
184 		 * breaks */
185 		if (e.width <= 0 || e.rep <= 0 || e.start < 0) {
186 			continue;
187 		}
188 
189 		/* To limit CPU time, if it is very likely that
190 		 * an interval would be merged anyway, then
191 		 * replace it with its containing interval. */
192 		int remaining = src_count - absorbed;
193 		bool force_combine = (absorbed > 30000) ||
194 				     10 * remaining < src_count;
195 
196 		int64_t intv_end = e.start + e.stride * (int64_t)(e.rep - 1) +
197 				   e.width;
198 		if (intv_end >= INT32_MAX) {
199 			/* overflow protection */
200 			e.width = INT32_MAX - 1 - e.start;
201 			e.rep = 1;
202 		}
203 		/* Remove internal gaps are smaller than the
204 		 * margin and hence
205 		 * would need to be merged away anyway. */
206 		if (e.width > e.stride - merge_margin || force_combine) {
207 			e.width = e.stride * (e.rep - 1) + e.width;
208 			e.rep = 1;
209 		}
210 
211 		if (buf_ensure_size(base.count + e.rep + 1,
212 				    sizeof(struct interval), &base.size,
213 				    (void **)&base.data) == -1) {
214 			wp_error("Failed to resize a merge buffer, some damage intervals may be lost");
215 			continue;
216 		}
217 
218 		struct interval *vec = &base.data[base.count];
219 		int iw = unpack_ext_interval(vec, e, alignment_bits);
220 		src_count += iw;
221 
222 		substack[substack_size] = (struct merge_stack_elem){
223 				.offset = base.count, .count = iw};
224 		substack_size++;
225 
226 		base.count += iw + 1;
227 
228 		/* merge down the stack as far as possible */
229 		substack_size = fix_merge_stack_property(substack_size,
230 				substack, &base, &temp, merge_margin, false,
231 				&absorbed);
232 	}
233 
234 	/* collapse the stack into a final interval */
235 	fix_merge_stack_property(substack_size, substack, &base, &temp,
236 			merge_margin, true, &absorbed);
237 	free(temp.data);
238 
239 	*dst_list = base.data;
240 	*dst_count = substack[0].count;
241 }
242 
243 /* This value must be larger than 8, or diffs will explode */
244 #define MERGE_MARGIN 256
merge_damage_records(struct damage * base,int nintervals,const struct ext_interval * const new_list,int alignment_bits)245 void merge_damage_records(struct damage *base, int nintervals,
246 		const struct ext_interval *const new_list, int alignment_bits)
247 {
248 	for (int i = 0; i < nintervals; i++) {
249 		base->acc_damage_stat += new_list[i].width * new_list[i].rep;
250 		base->acc_count++;
251 	}
252 
253 	// Fast return if there is nothing to do
254 	if (base->damage == DAMAGE_EVERYTHING || nintervals <= 0) {
255 		return;
256 	}
257 	if (nintervals >= (1 << 30) || base->ndamage_intvs >= (1 << 30)) {
258 		/* avoid overflow in merge routine; also would be cheaper to
259 		 * damage everything at this point;  */
260 		damage_everything(base);
261 		return;
262 	}
263 
264 	merge_mergesort(base->ndamage_intvs, base->damage, nintervals, new_list,
265 			&base->ndamage_intvs, &base->damage, MERGE_MARGIN,
266 			alignment_bits);
267 }
268 
reset_damage(struct damage * base)269 void reset_damage(struct damage *base)
270 {
271 	if (base->damage != DAMAGE_EVERYTHING) {
272 		free(base->damage);
273 	}
274 	base->damage = NULL;
275 	base->ndamage_intvs = 0;
276 	base->acc_damage_stat = 0;
277 	base->acc_count = 0;
278 }
damage_everything(struct damage * base)279 void damage_everything(struct damage *base)
280 {
281 	if (base->damage != DAMAGE_EVERYTHING) {
282 		free(base->damage);
283 	}
284 	base->damage = DAMAGE_EVERYTHING;
285 	base->ndamage_intvs = 0;
286 }
287