1 /*
2  * cache.c - guess the cache size(s)
3  *
4  * usage: cache [-c] [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]
5  *
6  * Copyright (c) 2000 Carl Staelin.
7  * Copyright (c) 1994 Larry McVoy.  Distributed under the FSF GPL with
8  * additional restriction that results may published only if
9  * (1) the benchmark is unmodified, and
10  * (2) the version in the sccsid below is included in the report.
11  * Support for this development by Sun Microsystems is gratefully acknowledged.
12  */
13 char	*id = "$Id$\n";
14 
15 #include "bench.h"
16 
17 
18 struct cache_results {
19 	size_t	len;
20 	size_t	maxlen;
21 	size_t	line;
22 	double	latency;
23 	double	variation;
24 	double	ratio;
25 	double	slope;
26 };
27 
28 int	find_cache(int start, int n, double prev_lat, struct cache_results* p);
29 int	collect_data(size_t start, size_t line, size_t maxlen,
30 		     int repetitions, struct cache_results** pdata);
31 void	search(int left, int right, int repetitions,
32 	       struct mem_state* state, struct cache_results* p);
33 int	collect_sample(int repetitions, struct mem_state* state,
34 			struct cache_results* p);
35 double	measure(size_t size, int repetitions,
36 		double* variation, struct mem_state* state);
37 double	remove_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
38 		       size_t len, int repetitions, struct mem_state* state);
39 int	test_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
40 		   size_t len, double *baseline, double chunk_baseline,
41 		   int repetitions, struct mem_state* state);
42 int	fixup_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
43 		    size_t len, double *baseline, double chunk_baseline,
44 		    int repetitions, struct mem_state* state);
45 void	check_memory(size_t size, struct mem_state* state);
46 void	pagesort(size_t n, size_t* pages, double* latencies);
47 
48 #ifdef ABS
49 #undef ABS
50 #endif
51 #define ABS(a) ((a) < 0 ? -(a) : (a))
52 
53 #define SWAP(a,b) {size_t _tmp = (a); (a) = (b); (b) = _tmp;}
54 
55 #define THRESHOLD 1.5
56 
57 #define	FIVE(m)		m m m m m
58 #define	TEN(m)		FIVE(m) FIVE(m)
59 #define	FIFTY(m)	TEN(m) TEN(m) TEN(m) TEN(m) TEN(m)
60 #define	HUNDRED(m)	FIFTY(m) FIFTY(m)
61 #define DEREF		p = (char**)*p;
62 
63 static char **addr_save = NULL;
64 
65 void
mem_benchmark(iter_t iterations,void * cookie)66 mem_benchmark(iter_t iterations, void *cookie)
67 {
68 	register char **p;
69 	struct mem_state* state = (struct mem_state*)cookie;
70 
71 	p = addr_save ? addr_save : (char**)state->p[0];
72 	while (iterations-- > 0) {
73 		HUNDRED(DEREF);
74 	}
75 	addr_save = p;
76 }
77 
78 
79 /*
80  * Assumptions:
81  *
82  * 1) Cache lines are a multiple of pointer-size words
83  * 2) Cache lines are no larger than 1/8 of a page (typically 512 bytes)
84  * 3) Pages are an even multiple of cache lines
85  */
86 int
main(int ac,char ** av)87 main(int ac, char **av)
88 {
89 	int	c;
90 	int	i, j, n, start, level, prev, min;
91 	int	warmup = 0;
92 	int	repetitions = (1000000 <= get_enough(0) ? 1 : TRIES);
93 	ssize_t	line = 0;
94 	size_t	maxlen = 32 * 1024 * 1024;
95 	int	*levels;
96 	double	par, maxpar, prev_lat;
97 	char   *usage = "[-c] [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]\n";
98 	struct cache_results* r;
99 	struct mem_state state;
100 
101 	while (( c = getopt(ac, av, "L:M:W:N:")) != EOF) {
102 		switch(c) {
103 		case 'L':
104 			line = atoi(optarg);
105 			if (line < sizeof(char*))
106 				line = sizeof(char*);
107 			break;
108 		case 'M':
109 			maxlen = bytes(optarg);
110 			break;
111 		case 'W':
112 			warmup = atoi(optarg);
113 			break;
114 		case 'N':
115 			repetitions = atoi(optarg);
116 			break;
117 		default:
118 			lmbench_usage(ac, av, usage);
119 			break;
120 		}
121 	}
122 
123 	sched_pin(0);
124 
125 	state.width = 1;
126 	state.len = maxlen;
127 	state.maxlen = maxlen;
128 	state.pagesize = getpagesize();
129 
130 	if (line == 0) {
131 		line = line_find(maxlen, warmup, repetitions, &state);
132 		if (line == 0)
133 			line = getpagesize() / 16;
134 		state.line = line;
135 	}
136 
137 	n = collect_data((size_t)512, line, maxlen, repetitions, &r);
138 	r[n-1].line = line;
139 	levels = (int*)malloc(n * sizeof(int));
140 	if (!levels) {
141 		perror("malloc");
142 		exit(1);
143 	}
144 	bzero(levels, n * sizeof(int));
145 
146 	for (start = 0, prev = 0, level = 0, prev_lat = -1.0;
147 	     (i = find_cache(start, n, prev_lat, r)) >= 0;
148 	     ++level, start = i + 1, prev = i)
149 	{
150 		/*
151 		 * performance is not greatly improved over main memory,
152 		 * so it is likely not a cache boundary
153 		 */
154 		if (r[i].latency / r[n-1].latency > 0.5) break;
155 
156 		/*
157 		 * is cache boundary "legal"? (e.g. 2^N or 1.5*2^N)
158 		 * cache sizes are "never" 1.25*2^N or 1.75*2^N
159 		 */
160 		for (c = r[i].len; c > 0x7; c >>= 1)
161 			;
162 		if (c == 5 || c == 7) {
163 			i++;
164 			if (i >= n) break;
165 		}
166 
167 		levels[level] = i;
168 		prev_lat = (r[start].latency > 0.0 ? r[start].latency : r[start - 1].latency);
169 	}
170 
171 	for (i = 0; i < level; ++i) {
172 		prev = (i > 0 ? levels[i-1]: -1);
173 
174 		/* locate most likely cache latency */
175 		for (j = min = prev + 1; j < levels[i]; ++j) {
176 			if (r[j].latency <= 0.) continue;
177 			if (r[min].latency <= 0.
178 			    || ABS(r[j].slope) < ABS(r[min].slope)) {
179 				min = j;
180 			}
181 		}
182 
183 		/* Compute line size */
184 		if (i == level - 1) {
185 			line = r[n-1].line;
186 		} else {
187 			j = (levels[i] + levels[i+1]) / 2;
188 			for (line = -1; line <= 0 && j < n; ++j) {
189 				r[j].line = line_find(r[j].len, warmup,
190 						      repetitions, &state);
191 				line = r[j].line;
192 			}
193 		}
194 
195 		/* Compute memory parallelism for cache */
196 		maxpar = par_mem(r[levels[i]-1].len, warmup,
197 				 repetitions, &state);
198 
199 		fprintf(stderr,
200 		    "L%d cache: %lu bytes %.2f nanoseconds %ld linesize %.2f parallelism\n",
201 		    (int)(i+1), (unsigned long)r[levels[i]].len,
202 		    r[min].latency, (long)line, maxpar);
203 	}
204 
205 	/* Compute memory parallelism for main memory */
206 	j = n - 1;
207 	for (i = n - 1; i >= 0; i--) {
208 		if (r[i].latency < 0.) continue;
209 		if (r[i].latency > 0.99 * r[n-1].latency)
210 			j = i;
211 	}
212 	par = par_mem(r[j].len, warmup, repetitions, &state);
213 
214 	fprintf(stderr, "Memory latency: %.2f nanoseconds %.2f parallelism\n",
215 		r[n-1].latency, par);
216 
217 	exit(0);
218 }
219 
220 int
find_cache(int start,int n,double prev_lat,struct cache_results * p)221 find_cache(int start, int n, double prev_lat, struct cache_results* p)
222 {
223 	int	i, j, prev;
224 	double	max = -1.;
225 
226 	for (prev = (start == 0 ? start : start - 1); prev > 0; prev--) {
227 		if (p[prev].ratio > 0.0) break;
228 	}
229 
230 	for (i = start, j = -1; i < n; ++i) {
231 		if (p[i].latency < 0.)
232 			continue;
233 		if (max < p[i].ratio)
234 			max = p[i].ratio;
235 		if (THRESHOLD < p[i].ratio)
236 			j = i;
237 		if (THRESHOLD < max && p[j].len * 2 <= p[i].len)
238 			return j;
239 		prev = i;
240 	}
241 	return -1;
242 }
243 
244 int
collect_data(size_t start,size_t line,size_t maxlen,int repetitions,struct cache_results ** pdata)245 collect_data(size_t start, size_t line, size_t maxlen,
246 	     int repetitions, struct cache_results** pdata)
247 {
248 	int	i;
249 	int	samples;
250 	int	idx;
251 	size_t	len = start;
252 	size_t	incr = start / 4;
253 	struct mem_state state;
254 	struct cache_results* p;
255 
256 
257 	state.width = 1;
258 	state.len = maxlen;
259 	state.maxlen = maxlen;
260 	state.line = line;
261 	state.pagesize = getpagesize();
262 	state.addr = NULL;
263 
264 	/* count the (maximum) number of samples to take */
265 	for (len = start, incr = start / 4, samples = 0; len <= maxlen; incr<<=1) {
266 		for (i = 0; i < 4 && len <= maxlen; ++i, len += incr)
267 			samples++;
268 	}
269 	*pdata = (struct cache_results*)
270 		malloc(samples * sizeof(struct cache_results));
271 	if (!*pdata) {
272 		perror("malloc");
273 		exit(2);
274 	}
275 	p = *pdata;
276 
277 	/* initialize the data */
278 	for (len = start, incr = start / 4, idx = 0; len <= maxlen; incr<<=1) {
279 		for (i = 0; i < 4 && len <= maxlen; ++i, ++idx, len += incr) {
280 			p[idx].len = len;
281 			p[idx].line = line;
282 			p[idx].latency = -1.;
283 			p[idx].ratio = -1.;
284 			p[idx].slope = -1.;
285 		}
286 	}
287 
288 	/* make sure we have enough memory for the scratch data */
289 	while (state.addr == NULL) {
290 		mem_initialize(0, &state);
291 		if (state.addr == NULL) {
292 			maxlen /= 2;
293 			state.len = state.maxlen = maxlen;
294 			while (p[samples-1].len > maxlen)
295 				samples--;
296 		}
297 	}
298 	for (i = 0; i < samples; ++i)
299 		p[i].maxlen = maxlen;
300 	/* in case the system has laid out the pages well, don't scramble */
301 	for (i = 0; i < state.npages; ++i)
302 		state.pages[i] = i * state.pagesize;
303 
304 	p[samples-1].latency = measure(p[samples-1].len, repetitions,
305 				       &p[samples-1].variation, &state);
306 	while (p[samples-1].latency <= 0.0) {
307 		p[samples-1].latency = measure(p[samples-1].len,
308 					       repetitions,
309 					       &p[samples-1].variation,
310 					       &state);
311 		--samples;
312 	}
313 	p[0].latency = measure(p[0].len, repetitions, &p[0].variation, &state);
314 	search(0, samples - 1, repetitions, &state, p);
315 
316 	/*
317 	fprintf(stderr, "%10.10s %8.8s %8.8s %8.8s %8.8s %5.5s\n",
318 		"mem size", "latency", "variation", "ratio", "slope", "line");
319 	for (idx = 0; idx < samples; ++idx) {
320 		if (p[idx].latency < 0.) continue;
321 		fprintf(stderr,
322 			"%10.6f %8.3f %8.3f %8.3f %8.3f %4lu\n",
323 			p[idx].len / (1000. * 1000.),
324 			p[idx].latency,
325 			p[idx].variation,
326 			p[idx].ratio,
327 			p[idx].slope,
328 			(unsigned long)p[idx].line);
329 	}
330 	/**/
331 	mem_cleanup(0, &state);
332 
333 	return samples;
334 }
335 
336 void
search(int left,int right,int repetitions,struct mem_state * state,struct cache_results * p)337 search(int left, int right, int repetitions,
338        struct mem_state* state, struct cache_results* p)
339 {
340 	int	middle = left + (right - left) / 2;
341 
342 	/*
343 	fprintf(stderr,
344 		"search(%d, %d, ...): [%lu/%G, %lu, %lu/%G] entering\n",
345 		left, right,
346 		(unsigned long)p[left].len, p[left].latency,
347 		(unsigned long)p[middle].len,
348 		(unsigned long)p[right].len, p[right].latency);
349 	/**/
350 
351 	if (p[left].latency > 0.0) {
352 		p[left].ratio = p[right].latency / p[left].latency;
353 		p[left].slope = (p[left].ratio - 1.) / (double)(right - left);
354 		/* we probably have a bad data point, so ignore it */
355 		if (p[left].ratio < 0.98) {
356 			p[left].latency = p[right].latency;
357 			p[left].ratio = 1.;
358 			p[left].slope = 0.;
359 		}
360 	}
361 
362 	if (middle == left || middle == right)
363 		return;
364 
365 	if (p[left].ratio > 1.35 || p[left].ratio < 0.97) {
366 		collect_sample(repetitions, state, &p[middle]);
367 		search(middle, right, repetitions, state, p);
368 		search(left, middle, repetitions, state, p);
369 	}
370 	return;
371 }
372 
373 int
collect_sample(int repetitions,struct mem_state * state,struct cache_results * p)374 collect_sample(int repetitions, struct mem_state* state,
375 	       struct cache_results* p)
376 {
377 	int	i, modified, npages;
378 	double	baseline;
379 
380 	npages = (p->len + getpagesize() - 1) / getpagesize();
381         baseline = measure(p->len, repetitions, &p->variation, state);
382 
383 	if (npages > 1) {
384 		for (i = 0, modified = 1; i < 8 && modified; ++i) {
385 			modified = test_chunk(0, npages, npages,
386 					      state->pages, p->len,
387 					      &baseline, 0.0,
388 					      repetitions, state);
389 		}
390 	}
391 	p->latency = baseline;
392 
393 	/*
394 	fprintf(stderr, "collect_sample: len=%lu, latency=%G\n",
395 		(unsigned long)p->len, p->latency);
396 	/**/
397 
398 	return (p->latency > 0);
399 }
400 
401 double
measure(size_t size,int repetitions,double * variation,struct mem_state * state)402 measure(size_t size, int repetitions,
403 	double* variation, struct mem_state* state)
404 {
405 	size_t	i, j, npages, nlines;
406 	double	time, median;
407 	char	*p;
408 	result_t *r, *r_save;
409 	size_t	*pages;
410 
411 	pages = state->pages;
412 	npages = (size + getpagesize() - 1) / getpagesize();
413 	nlines = state->nlines;
414 
415 	if (size % getpagesize())
416 		nlines = (size % getpagesize()) / state->line;
417 
418 	r_save = get_results();
419 	r = (result_t*)malloc(sizeof_result(repetitions));
420 	if (!r) {
421 		perror("malloc");
422 		exit(3);
423 	}
424 	insertinit(r);
425 
426 	/*
427 	 * assumes that you have used mem_initialize() to setup the memory
428 	 */
429 	p = state->base;
430 	for (i = 0; i < npages - 1; ++i) {
431 		for (j = 0; j < state->nwords; ++j) {
432 			*(char**)(p + pages[i] + state->lines[state->nlines - 1] + state->words[j]) =
433 			p + pages[i+1] + state->lines[0] + state->words[j];
434 		}
435 	}
436 	for (j = 0; j < state->nwords; ++j) {
437 		*(char**)(p + pages[npages - 1] + state->lines[nlines - 1] + state->words[j]) =
438 			p + pages[0] + state->lines[0] + state->words[(j+1)%state->nwords];
439 	}
440 
441 	/*
442 	check_memory(size, state);
443 	/**/
444 
445 	addr_save = NULL;
446 	state->p[0] = p + pages[0] + state->lines[0] + state->words[0];
447 	/* now, run through the chain once to clear the cache */
448 	mem_benchmark((size / sizeof(char*) + 100) / 100, state);
449 
450 	for (i = 0; i < repetitions; ++i) {
451 		BENCH1(mem_benchmark(__n, state); __n = 1;, 0)
452 		insertsort(gettime(), get_n(), r);
453 	}
454 	set_results(r);
455 	median = (1000. * (double)gettime()) / (100. * (double)get_n());
456 
457 	save_minimum();
458 	time = (1000. * (double)gettime()) / (100. * (double)get_n());
459 
460 	/* Are the results stable, or do they vary? */
461 	if (time != 0.)
462 		*variation = median / time;
463 	else
464 		*variation = -1.0;
465 	set_results(r_save);
466 	free(r);
467 
468 	if (nlines < state->nlines) {
469 		for (j = 0; j < state->nwords; ++j) {
470 			*(char**)(p + pages[npages - 1] + state->lines[nlines - 1] + state->words[j]) =
471 				p + pages[npages - 1] + state->lines[nlines] + state->words[j];
472 		}
473 	}
474 	/*
475 	fprintf(stderr, "%.6f %.2f\n", size / (1000. * 1000.), median);
476 	/**/
477 
478 	return median;
479 }
480 
481 
482 double
remove_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,int repetitions,struct mem_state * state)483 remove_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
484 	     size_t len, int repetitions, struct mem_state* state)
485 {
486 	size_t	n, j;
487 	double	t, var;
488 
489 	if (i + chunk < npages) {
490 		for (j = 0; j < chunk; ++j) {
491 			n = pages[i+j];
492 			pages[i+j] = pages[npages-1-j];
493 			pages[npages-1-j] = n;
494 		}
495 	}
496 	t = measure(len - chunk * getpagesize(), repetitions, &var, state);
497 	if (i + chunk < npages) {
498 		for (j = 0; j < chunk; ++j) {
499 			n = pages[i+j];
500 			pages[i+j] = pages[npages-1-j];
501 			pages[npages-1-j] = n;
502 		}
503 	}
504 
505 	return t;
506 }
507 
508 int
test_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,double * baseline,double chunk_baseline,int repetitions,struct mem_state * state)509 test_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
510 	   size_t len, double *baseline, double chunk_baseline,
511 	   int repetitions, struct mem_state* state)
512 {
513 	int	modified = 0;
514 	int	changed;
515 	size_t	j, k, subchunk;
516 	double	t, tt, nodiff_chunk_baseline;
517 
518 	if (chunk <= 20 && chunk < npages) {
519 		return fixup_chunk(i, chunk, npages, pages, len, baseline,
520 				   chunk_baseline, repetitions, state);
521 	}
522 
523 	nodiff_chunk_baseline = *baseline;
524 	subchunk = (chunk + 19) / 20;
525 	for (j = i, k = 0; j < i + chunk; j+=subchunk, k++) {
526 		if (j + subchunk > i + chunk) subchunk = i + chunk - j;
527 
528 		t = remove_chunk(j, subchunk, npages, pages,
529 				 len, repetitions, state);
530 
531 		/*
532 		fprintf(stderr, "test_chunk(...): baseline=%G, t=%G, len=%d, chunk=%d, i=%d\n", *baseline, t, len, subchunk, j);
533 		/**/
534 
535 		if (t >= 0.99 * *baseline) continue;
536 		if (t >= 0.999 * nodiff_chunk_baseline) continue;
537 
538 		tt = remove_chunk(j, subchunk, npages, pages,
539 				  len, repetitions, state);
540 
541 		if (tt > t) t = tt;
542 
543 		if (t >= 0.99 * *baseline) continue;
544 		if (t >= 0.999 * nodiff_chunk_baseline) continue;
545 
546 		changed = test_chunk(j, subchunk, npages, pages, len,
547 				     baseline, t, repetitions, state);
548 
549 		if (changed) {
550 			modified = 1;
551 		} else {
552 			nodiff_chunk_baseline = t;
553 		}
554 	}
555 	return modified;
556 }
557 
558 /*
559  * This routine is called once we have identified a chunk
560  * that has pages that are suspected of colliding with other
561  * pages.
562  *
563  * The algorithm is to remove all the pages, and then
564  * slowly add back pages; attempting to add pages with
565  * minimal cost.
566  */
567 int
fixup_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,double * baseline,double chunk_baseline,int repetitions,struct mem_state * state)568 fixup_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
569 	    size_t len, double *baseline, double chunk_baseline,
570 	    int repetitions, struct mem_state* state)
571 {
572 	int	swapped = 0;
573 	size_t	j, k;
574 	size_t	page, substitute, original;
575 	size_t	ntotalpages, nsparepages;
576 	size_t	subset_len;
577 	size_t	*pageset;
578 	size_t	*saved_pages;
579 	static size_t	available_index = 0;
580 	double	t, var, new_baseline;
581 	double	latencies[20];
582 
583 	ntotalpages = (state->maxlen + getpagesize() - 1)/ getpagesize();
584 	nsparepages = ntotalpages - npages;
585 	pageset = state->pages + npages;
586 	new_baseline = *baseline;
587 
588 	saved_pages = (size_t*)malloc(sizeof(size_t) * ntotalpages);
589 	if (!saved_pages) {
590 		perror("malloc");
591 		exit(4);
592 	}
593 	bcopy(pages, saved_pages, sizeof(int) * ntotalpages);
594 
595 	/* move everything to the end of the page list */
596 	if (i + chunk < npages) {
597 		for (j = 0; j < chunk; ++j) {
598 			page = pages[i+j];
599 			pages[i+j] = pages[npages-chunk+j];
600 			pages[npages-chunk+j] = page;
601 		}
602 	}
603 
604 	if (available_index >= nsparepages) available_index = 0;
605 
606 	/*
607 	 * first try to identify which pages we can definitely keep
608 	 */
609 	for (j = 0, k = chunk; j < k; ) {
610 
611 		t = measure((npages - chunk + j + 1) * getpagesize(),
612 			    repetitions, &var, state);
613 
614 		if (0.995 * t <= chunk_baseline) {
615 			latencies[j] = t;
616 			++j;	/* keep this page */
617 		} else {
618 			--k;	/* this page is probably no good */
619 			latencies[k] = t;
620 			SWAP(pages[npages - chunk + j], pages[npages - chunk + k]);
621 		}
622 	}
623 	/*
624 	 * sort the "bad" pages by increasing latency
625 	 */
626 	pagesort(chunk - j, &pages[npages - chunk + j], &latencies[j]);
627 
628 	/*
629 	fprintf(stderr, "fixup_chunk: len=%d, chunk=%d, j=%d, baseline=%G, lat[%d]=%G..%G\n", len, chunk, j, *baseline, j, (j < chunk ? latencies[j] : -1.0), latencies[chunk - 1]);
630 	/**/
631 
632 	if (chunk >= npages && j < chunk / 2) {
633 		j = chunk / 2;
634 		t = measure((npages - chunk + j + 1) * getpagesize(),
635 			    repetitions, &var, state);
636 		chunk_baseline = t;
637 	}
638 
639 	for (k = 0; j < chunk && k < 2 * npages; ++k) {
640 		original = npages - chunk + j;
641 		substitute = nsparepages - 1;
642 		substitute -= (k + available_index) % (nsparepages - 1);
643 		subset_len = (original + 1) * getpagesize();
644 		if (j == chunk - 1 && len % getpagesize()) {
645 			subset_len = len;
646 		}
647 
648 		SWAP(pages[original], pageset[substitute]);
649 		t = measure(subset_len, repetitions, &var, state);
650 		SWAP(pages[original], pageset[substitute]);
651 
652 		/*
653 		 * try to keep pages ordered by increasing latency
654 		 */
655 		if (t < latencies[chunk - 1]) {
656 			latencies[chunk - 1] = t;
657 			SWAP(pages[npages - 1], pageset[substitute]);
658 			pagesort(chunk - j,
659 				 &pages[npages - chunk + j], &latencies[j]);
660 		}
661 		if (0.995 * latencies[j] <= chunk_baseline) {
662 			++j;	/* keep this page */
663 			++swapped;
664 		}
665 	}
666 
667 	available_index = (k + available_index) % (nsparepages - 1);
668 
669 	/* measure new baseline, in case we didn't manage to optimally
670 	 * replace every page
671 	 */
672 	if (swapped) {
673 		new_baseline = measure(len, repetitions, &var, state);
674 
675 		/*
676 		fprintf(stderr, "fixup_chunk: len=%d, swapped=%d, k=%d, baseline=%G, newbase=%G\n", len, swapped, k, *baseline, new_baseline);
677 		/**/
678 
679 		if (new_baseline >= 0.999 * *baseline) {
680 			/* no benefit to these changes; back them out */
681 			swapped = 0;
682 			bcopy(saved_pages, pages, sizeof(int) * ntotalpages);
683 		} else {
684 			/* we sped up, so keep these changes */
685 			*baseline = new_baseline;
686 
687 			/* move back to the middle of the pagelist */
688 			if (i + chunk < npages) {
689 				for (j = 0; j < chunk; ++j) {
690 					page = pages[i+j];
691 					pages[i+j] = pages[npages-chunk+j];
692 					pages[npages-chunk+j] = page;
693 				}
694 			}
695 		}
696 	/*
697 	} else {
698 		fprintf(stderr, "fixup_chunk: len=%d, swapped=%d, k=%d\n", len, swapped, k);
699 	/**/
700 	}
701 	free(saved_pages);
702 
703 	return swapped;
704 }
705 
706 void
check_memory(size_t size,struct mem_state * state)707 check_memory(size_t size, struct mem_state* state)
708 {
709 	size_t	i, j, first_page, npages, nwords;
710 	size_t	page, word_count, pagesize;
711 	off_t	offset;
712 	char	**p, **q;
713 	char	**start;
714 
715 	pagesize = getpagesize();
716 	npages = (size + pagesize - 1) / pagesize;
717 	nwords = size / sizeof(char*);
718 
719 	/*
720 	fprintf(stderr, "check_memory(%d, ...): entering, %d words\n", size, nwords);
721 	/**/
722 	word_count = 1;
723 	first_page = 0;
724 	start = (char**)(state->base + state->pages[0] + state->lines[0] + state->words[0]);
725 	for (q = p = (char**)*start; p != start; ) {
726 		word_count++;
727 		offset = (unsigned long)p - (unsigned long)state->base;
728 		page = offset - offset % pagesize;
729 		for (j = first_page; j < npages; ++j) {
730 			if (page == state->pages[j]) break;
731 		}
732 		if (j == npages) {
733 			for (j = 0; j < first_page; ++j) {
734 				if (page == state->pages[j]) break;
735 			}
736 			if (j == first_page) {
737 				fprintf(stderr,
738 					"check_memory: bad memory reference for size %lu\n",
739 					(unsigned long)size);
740 			}
741 		}
742 		first_page = j % npages;
743 		p = (char**)*p;
744 		if (word_count & 0x1) q = (char**)*q;
745 		if (*p == *q) {
746 			fprintf(stderr, "check_memory: unwanted memory cycle! page=%lu\n", (unsigned long)j);
747 			return;
748 		}
749 	}
750 	if (word_count != nwords) {
751 		fprintf(stderr, "check_memory: wrong word count, expected %lu, got %lu\n", (unsigned long)nwords, (unsigned long)word_count);
752 	}
753 	/*
754 	fprintf(stderr, "check_memory(%lu, ...): exiting\n", (unsigned long)size);
755 	/**/
756 }
757 
758 void
pagesort(size_t n,size_t * pages,double * latencies)759 pagesort(size_t n, size_t* pages, double* latencies)
760 {
761 	int	i, j;
762 	double	t;
763 
764 	for (i = 0; i < n - 1; ++i) {
765 		for (j = i + 1; j < n; ++j) {
766 			if (latencies[i] > latencies[j]) {
767 				t = latencies[i];
768 				latencies[i] = latencies[j];
769 				latencies[j] = t;
770 				SWAP(pages[i], pages[j]);
771 			}
772 		}
773 	}
774 }
775