1 /*
2 * cache.c - guess the cache size(s)
3 *
4 * usage: cache [-c] [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]
5 *
6 * Copyright (c) 2000 Carl Staelin.
7 * Copyright (c) 1994 Larry McVoy. Distributed under the FSF GPL with
8 * additional restriction that results may published only if
9 * (1) the benchmark is unmodified, and
10 * (2) the version in the sccsid below is included in the report.
11 * Support for this development by Sun Microsystems is gratefully acknowledged.
12 */
13 char *id = "$Id$\n";
14
15 #include "bench.h"
16
17
18 struct cache_results {
19 size_t len;
20 size_t maxlen;
21 size_t line;
22 double latency;
23 double variation;
24 double ratio;
25 double slope;
26 };
27
28 int find_cache(int start, int n, double prev_lat, struct cache_results* p);
29 int collect_data(size_t start, size_t line, size_t maxlen,
30 int repetitions, struct cache_results** pdata);
31 void search(int left, int right, int repetitions,
32 struct mem_state* state, struct cache_results* p);
33 int collect_sample(int repetitions, struct mem_state* state,
34 struct cache_results* p);
35 double measure(size_t size, int repetitions,
36 double* variation, struct mem_state* state);
37 double remove_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
38 size_t len, int repetitions, struct mem_state* state);
39 int test_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
40 size_t len, double *baseline, double chunk_baseline,
41 int repetitions, struct mem_state* state);
42 int fixup_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
43 size_t len, double *baseline, double chunk_baseline,
44 int repetitions, struct mem_state* state);
45 void check_memory(size_t size, struct mem_state* state);
46 void pagesort(size_t n, size_t* pages, double* latencies);
47
48 #ifdef ABS
49 #undef ABS
50 #endif
51 #define ABS(a) ((a) < 0 ? -(a) : (a))
52
53 #define SWAP(a,b) {size_t _tmp = (a); (a) = (b); (b) = _tmp;}
54
55 #define THRESHOLD 1.5
56
57 #define FIVE(m) m m m m m
58 #define TEN(m) FIVE(m) FIVE(m)
59 #define FIFTY(m) TEN(m) TEN(m) TEN(m) TEN(m) TEN(m)
60 #define HUNDRED(m) FIFTY(m) FIFTY(m)
61 #define DEREF p = (char**)*p;
62
63 static char **addr_save = NULL;
64
65 void
mem_benchmark(iter_t iterations,void * cookie)66 mem_benchmark(iter_t iterations, void *cookie)
67 {
68 register char **p;
69 struct mem_state* state = (struct mem_state*)cookie;
70
71 p = addr_save ? addr_save : (char**)state->p[0];
72 while (iterations-- > 0) {
73 HUNDRED(DEREF);
74 }
75 addr_save = p;
76 }
77
78
79 /*
80 * Assumptions:
81 *
82 * 1) Cache lines are a multiple of pointer-size words
83 * 2) Cache lines are no larger than 1/8 of a page (typically 512 bytes)
84 * 3) Pages are an even multiple of cache lines
85 */
86 int
main(int ac,char ** av)87 main(int ac, char **av)
88 {
89 int c;
90 int i, j, n, start, level, prev, min;
91 int warmup = 0;
92 int repetitions = (1000000 <= get_enough(0) ? 1 : TRIES);
93 ssize_t line = 0;
94 size_t maxlen = 32 * 1024 * 1024;
95 int *levels;
96 double par, maxpar, prev_lat;
97 char *usage = "[-c] [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]\n";
98 struct cache_results* r;
99 struct mem_state state;
100
101 while (( c = getopt(ac, av, "L:M:W:N:")) != EOF) {
102 switch(c) {
103 case 'L':
104 line = atoi(optarg);
105 if (line < sizeof(char*))
106 line = sizeof(char*);
107 break;
108 case 'M':
109 maxlen = bytes(optarg);
110 break;
111 case 'W':
112 warmup = atoi(optarg);
113 break;
114 case 'N':
115 repetitions = atoi(optarg);
116 break;
117 default:
118 lmbench_usage(ac, av, usage);
119 break;
120 }
121 }
122
123 sched_pin(0);
124
125 state.width = 1;
126 state.len = maxlen;
127 state.maxlen = maxlen;
128 state.pagesize = getpagesize();
129
130 if (line == 0) {
131 line = line_find(maxlen, warmup, repetitions, &state);
132 if (line == 0)
133 line = getpagesize() / 16;
134 state.line = line;
135 }
136
137 n = collect_data((size_t)512, line, maxlen, repetitions, &r);
138 r[n-1].line = line;
139 levels = (int*)malloc(n * sizeof(int));
140 if (!levels) {
141 perror("malloc");
142 exit(1);
143 }
144 bzero(levels, n * sizeof(int));
145
146 for (start = 0, prev = 0, level = 0, prev_lat = -1.0;
147 (i = find_cache(start, n, prev_lat, r)) >= 0;
148 ++level, start = i + 1, prev = i)
149 {
150 /*
151 * performance is not greatly improved over main memory,
152 * so it is likely not a cache boundary
153 */
154 if (r[i].latency / r[n-1].latency > 0.5) break;
155
156 /*
157 * is cache boundary "legal"? (e.g. 2^N or 1.5*2^N)
158 * cache sizes are "never" 1.25*2^N or 1.75*2^N
159 */
160 for (c = r[i].len; c > 0x7; c >>= 1)
161 ;
162 if (c == 5 || c == 7) {
163 i++;
164 if (i >= n) break;
165 }
166
167 levels[level] = i;
168 prev_lat = (r[start].latency > 0.0 ? r[start].latency : r[start - 1].latency);
169 }
170
171 for (i = 0; i < level; ++i) {
172 prev = (i > 0 ? levels[i-1]: -1);
173
174 /* locate most likely cache latency */
175 for (j = min = prev + 1; j < levels[i]; ++j) {
176 if (r[j].latency <= 0.) continue;
177 if (r[min].latency <= 0.
178 || ABS(r[j].slope) < ABS(r[min].slope)) {
179 min = j;
180 }
181 }
182
183 /* Compute line size */
184 if (i == level - 1) {
185 line = r[n-1].line;
186 } else {
187 j = (levels[i] + levels[i+1]) / 2;
188 for (line = -1; line <= 0 && j < n; ++j) {
189 r[j].line = line_find(r[j].len, warmup,
190 repetitions, &state);
191 line = r[j].line;
192 }
193 }
194
195 /* Compute memory parallelism for cache */
196 maxpar = par_mem(r[levels[i]-1].len, warmup,
197 repetitions, &state);
198
199 fprintf(stderr,
200 "L%d cache: %lu bytes %.2f nanoseconds %ld linesize %.2f parallelism\n",
201 (int)(i+1), (unsigned long)r[levels[i]].len,
202 r[min].latency, (long)line, maxpar);
203 }
204
205 /* Compute memory parallelism for main memory */
206 j = n - 1;
207 for (i = n - 1; i >= 0; i--) {
208 if (r[i].latency < 0.) continue;
209 if (r[i].latency > 0.99 * r[n-1].latency)
210 j = i;
211 }
212 par = par_mem(r[j].len, warmup, repetitions, &state);
213
214 fprintf(stderr, "Memory latency: %.2f nanoseconds %.2f parallelism\n",
215 r[n-1].latency, par);
216
217 exit(0);
218 }
219
220 int
find_cache(int start,int n,double prev_lat,struct cache_results * p)221 find_cache(int start, int n, double prev_lat, struct cache_results* p)
222 {
223 int i, j, prev;
224 double max = -1.;
225
226 for (prev = (start == 0 ? start : start - 1); prev > 0; prev--) {
227 if (p[prev].ratio > 0.0) break;
228 }
229
230 for (i = start, j = -1; i < n; ++i) {
231 if (p[i].latency < 0.)
232 continue;
233 if (max < p[i].ratio)
234 max = p[i].ratio;
235 if (THRESHOLD < p[i].ratio)
236 j = i;
237 if (THRESHOLD < max && p[j].len * 2 <= p[i].len)
238 return j;
239 prev = i;
240 }
241 return -1;
242 }
243
244 int
collect_data(size_t start,size_t line,size_t maxlen,int repetitions,struct cache_results ** pdata)245 collect_data(size_t start, size_t line, size_t maxlen,
246 int repetitions, struct cache_results** pdata)
247 {
248 int i;
249 int samples;
250 int idx;
251 size_t len = start;
252 size_t incr = start / 4;
253 struct mem_state state;
254 struct cache_results* p;
255
256
257 state.width = 1;
258 state.len = maxlen;
259 state.maxlen = maxlen;
260 state.line = line;
261 state.pagesize = getpagesize();
262 state.addr = NULL;
263
264 /* count the (maximum) number of samples to take */
265 for (len = start, incr = start / 4, samples = 0; len <= maxlen; incr<<=1) {
266 for (i = 0; i < 4 && len <= maxlen; ++i, len += incr)
267 samples++;
268 }
269 *pdata = (struct cache_results*)
270 malloc(samples * sizeof(struct cache_results));
271 if (!*pdata) {
272 perror("malloc");
273 exit(2);
274 }
275 p = *pdata;
276
277 /* initialize the data */
278 for (len = start, incr = start / 4, idx = 0; len <= maxlen; incr<<=1) {
279 for (i = 0; i < 4 && len <= maxlen; ++i, ++idx, len += incr) {
280 p[idx].len = len;
281 p[idx].line = line;
282 p[idx].latency = -1.;
283 p[idx].ratio = -1.;
284 p[idx].slope = -1.;
285 }
286 }
287
288 /* make sure we have enough memory for the scratch data */
289 while (state.addr == NULL) {
290 mem_initialize(0, &state);
291 if (state.addr == NULL) {
292 maxlen /= 2;
293 state.len = state.maxlen = maxlen;
294 while (p[samples-1].len > maxlen)
295 samples--;
296 }
297 }
298 for (i = 0; i < samples; ++i)
299 p[i].maxlen = maxlen;
300 /* in case the system has laid out the pages well, don't scramble */
301 for (i = 0; i < state.npages; ++i)
302 state.pages[i] = i * state.pagesize;
303
304 p[samples-1].latency = measure(p[samples-1].len, repetitions,
305 &p[samples-1].variation, &state);
306 while (p[samples-1].latency <= 0.0) {
307 p[samples-1].latency = measure(p[samples-1].len,
308 repetitions,
309 &p[samples-1].variation,
310 &state);
311 --samples;
312 }
313 p[0].latency = measure(p[0].len, repetitions, &p[0].variation, &state);
314 search(0, samples - 1, repetitions, &state, p);
315
316 /*
317 fprintf(stderr, "%10.10s %8.8s %8.8s %8.8s %8.8s %5.5s\n",
318 "mem size", "latency", "variation", "ratio", "slope", "line");
319 for (idx = 0; idx < samples; ++idx) {
320 if (p[idx].latency < 0.) continue;
321 fprintf(stderr,
322 "%10.6f %8.3f %8.3f %8.3f %8.3f %4lu\n",
323 p[idx].len / (1000. * 1000.),
324 p[idx].latency,
325 p[idx].variation,
326 p[idx].ratio,
327 p[idx].slope,
328 (unsigned long)p[idx].line);
329 }
330 /**/
331 mem_cleanup(0, &state);
332
333 return samples;
334 }
335
336 void
search(int left,int right,int repetitions,struct mem_state * state,struct cache_results * p)337 search(int left, int right, int repetitions,
338 struct mem_state* state, struct cache_results* p)
339 {
340 int middle = left + (right - left) / 2;
341
342 /*
343 fprintf(stderr,
344 "search(%d, %d, ...): [%lu/%G, %lu, %lu/%G] entering\n",
345 left, right,
346 (unsigned long)p[left].len, p[left].latency,
347 (unsigned long)p[middle].len,
348 (unsigned long)p[right].len, p[right].latency);
349 /**/
350
351 if (p[left].latency > 0.0) {
352 p[left].ratio = p[right].latency / p[left].latency;
353 p[left].slope = (p[left].ratio - 1.) / (double)(right - left);
354 /* we probably have a bad data point, so ignore it */
355 if (p[left].ratio < 0.98) {
356 p[left].latency = p[right].latency;
357 p[left].ratio = 1.;
358 p[left].slope = 0.;
359 }
360 }
361
362 if (middle == left || middle == right)
363 return;
364
365 if (p[left].ratio > 1.35 || p[left].ratio < 0.97) {
366 collect_sample(repetitions, state, &p[middle]);
367 search(middle, right, repetitions, state, p);
368 search(left, middle, repetitions, state, p);
369 }
370 return;
371 }
372
373 int
collect_sample(int repetitions,struct mem_state * state,struct cache_results * p)374 collect_sample(int repetitions, struct mem_state* state,
375 struct cache_results* p)
376 {
377 int i, modified, npages;
378 double baseline;
379
380 npages = (p->len + getpagesize() - 1) / getpagesize();
381 baseline = measure(p->len, repetitions, &p->variation, state);
382
383 if (npages > 1) {
384 for (i = 0, modified = 1; i < 8 && modified; ++i) {
385 modified = test_chunk(0, npages, npages,
386 state->pages, p->len,
387 &baseline, 0.0,
388 repetitions, state);
389 }
390 }
391 p->latency = baseline;
392
393 /*
394 fprintf(stderr, "collect_sample: len=%lu, latency=%G\n",
395 (unsigned long)p->len, p->latency);
396 /**/
397
398 return (p->latency > 0);
399 }
400
401 double
measure(size_t size,int repetitions,double * variation,struct mem_state * state)402 measure(size_t size, int repetitions,
403 double* variation, struct mem_state* state)
404 {
405 size_t i, j, npages, nlines;
406 double time, median;
407 char *p;
408 result_t *r, *r_save;
409 size_t *pages;
410
411 pages = state->pages;
412 npages = (size + getpagesize() - 1) / getpagesize();
413 nlines = state->nlines;
414
415 if (size % getpagesize())
416 nlines = (size % getpagesize()) / state->line;
417
418 r_save = get_results();
419 r = (result_t*)malloc(sizeof_result(repetitions));
420 if (!r) {
421 perror("malloc");
422 exit(3);
423 }
424 insertinit(r);
425
426 /*
427 * assumes that you have used mem_initialize() to setup the memory
428 */
429 p = state->base;
430 for (i = 0; i < npages - 1; ++i) {
431 for (j = 0; j < state->nwords; ++j) {
432 *(char**)(p + pages[i] + state->lines[state->nlines - 1] + state->words[j]) =
433 p + pages[i+1] + state->lines[0] + state->words[j];
434 }
435 }
436 for (j = 0; j < state->nwords; ++j) {
437 *(char**)(p + pages[npages - 1] + state->lines[nlines - 1] + state->words[j]) =
438 p + pages[0] + state->lines[0] + state->words[(j+1)%state->nwords];
439 }
440
441 /*
442 check_memory(size, state);
443 /**/
444
445 addr_save = NULL;
446 state->p[0] = p + pages[0] + state->lines[0] + state->words[0];
447 /* now, run through the chain once to clear the cache */
448 mem_benchmark((size / sizeof(char*) + 100) / 100, state);
449
450 for (i = 0; i < repetitions; ++i) {
451 BENCH1(mem_benchmark(__n, state); __n = 1;, 0)
452 insertsort(gettime(), get_n(), r);
453 }
454 set_results(r);
455 median = (1000. * (double)gettime()) / (100. * (double)get_n());
456
457 save_minimum();
458 time = (1000. * (double)gettime()) / (100. * (double)get_n());
459
460 /* Are the results stable, or do they vary? */
461 if (time != 0.)
462 *variation = median / time;
463 else
464 *variation = -1.0;
465 set_results(r_save);
466 free(r);
467
468 if (nlines < state->nlines) {
469 for (j = 0; j < state->nwords; ++j) {
470 *(char**)(p + pages[npages - 1] + state->lines[nlines - 1] + state->words[j]) =
471 p + pages[npages - 1] + state->lines[nlines] + state->words[j];
472 }
473 }
474 /*
475 fprintf(stderr, "%.6f %.2f\n", size / (1000. * 1000.), median);
476 /**/
477
478 return median;
479 }
480
481
482 double
remove_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,int repetitions,struct mem_state * state)483 remove_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
484 size_t len, int repetitions, struct mem_state* state)
485 {
486 size_t n, j;
487 double t, var;
488
489 if (i + chunk < npages) {
490 for (j = 0; j < chunk; ++j) {
491 n = pages[i+j];
492 pages[i+j] = pages[npages-1-j];
493 pages[npages-1-j] = n;
494 }
495 }
496 t = measure(len - chunk * getpagesize(), repetitions, &var, state);
497 if (i + chunk < npages) {
498 for (j = 0; j < chunk; ++j) {
499 n = pages[i+j];
500 pages[i+j] = pages[npages-1-j];
501 pages[npages-1-j] = n;
502 }
503 }
504
505 return t;
506 }
507
508 int
test_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,double * baseline,double chunk_baseline,int repetitions,struct mem_state * state)509 test_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
510 size_t len, double *baseline, double chunk_baseline,
511 int repetitions, struct mem_state* state)
512 {
513 int modified = 0;
514 int changed;
515 size_t j, k, subchunk;
516 double t, tt, nodiff_chunk_baseline;
517
518 if (chunk <= 20 && chunk < npages) {
519 return fixup_chunk(i, chunk, npages, pages, len, baseline,
520 chunk_baseline, repetitions, state);
521 }
522
523 nodiff_chunk_baseline = *baseline;
524 subchunk = (chunk + 19) / 20;
525 for (j = i, k = 0; j < i + chunk; j+=subchunk, k++) {
526 if (j + subchunk > i + chunk) subchunk = i + chunk - j;
527
528 t = remove_chunk(j, subchunk, npages, pages,
529 len, repetitions, state);
530
531 /*
532 fprintf(stderr, "test_chunk(...): baseline=%G, t=%G, len=%d, chunk=%d, i=%d\n", *baseline, t, len, subchunk, j);
533 /**/
534
535 if (t >= 0.99 * *baseline) continue;
536 if (t >= 0.999 * nodiff_chunk_baseline) continue;
537
538 tt = remove_chunk(j, subchunk, npages, pages,
539 len, repetitions, state);
540
541 if (tt > t) t = tt;
542
543 if (t >= 0.99 * *baseline) continue;
544 if (t >= 0.999 * nodiff_chunk_baseline) continue;
545
546 changed = test_chunk(j, subchunk, npages, pages, len,
547 baseline, t, repetitions, state);
548
549 if (changed) {
550 modified = 1;
551 } else {
552 nodiff_chunk_baseline = t;
553 }
554 }
555 return modified;
556 }
557
558 /*
559 * This routine is called once we have identified a chunk
560 * that has pages that are suspected of colliding with other
561 * pages.
562 *
563 * The algorithm is to remove all the pages, and then
564 * slowly add back pages; attempting to add pages with
565 * minimal cost.
566 */
567 int
fixup_chunk(size_t i,size_t chunk,size_t npages,size_t * pages,size_t len,double * baseline,double chunk_baseline,int repetitions,struct mem_state * state)568 fixup_chunk(size_t i, size_t chunk, size_t npages, size_t* pages,
569 size_t len, double *baseline, double chunk_baseline,
570 int repetitions, struct mem_state* state)
571 {
572 int swapped = 0;
573 size_t j, k;
574 size_t page, substitute, original;
575 size_t ntotalpages, nsparepages;
576 size_t subset_len;
577 size_t *pageset;
578 size_t *saved_pages;
579 static size_t available_index = 0;
580 double t, var, new_baseline;
581 double latencies[20];
582
583 ntotalpages = (state->maxlen + getpagesize() - 1)/ getpagesize();
584 nsparepages = ntotalpages - npages;
585 pageset = state->pages + npages;
586 new_baseline = *baseline;
587
588 saved_pages = (size_t*)malloc(sizeof(size_t) * ntotalpages);
589 if (!saved_pages) {
590 perror("malloc");
591 exit(4);
592 }
593 bcopy(pages, saved_pages, sizeof(int) * ntotalpages);
594
595 /* move everything to the end of the page list */
596 if (i + chunk < npages) {
597 for (j = 0; j < chunk; ++j) {
598 page = pages[i+j];
599 pages[i+j] = pages[npages-chunk+j];
600 pages[npages-chunk+j] = page;
601 }
602 }
603
604 if (available_index >= nsparepages) available_index = 0;
605
606 /*
607 * first try to identify which pages we can definitely keep
608 */
609 for (j = 0, k = chunk; j < k; ) {
610
611 t = measure((npages - chunk + j + 1) * getpagesize(),
612 repetitions, &var, state);
613
614 if (0.995 * t <= chunk_baseline) {
615 latencies[j] = t;
616 ++j; /* keep this page */
617 } else {
618 --k; /* this page is probably no good */
619 latencies[k] = t;
620 SWAP(pages[npages - chunk + j], pages[npages - chunk + k]);
621 }
622 }
623 /*
624 * sort the "bad" pages by increasing latency
625 */
626 pagesort(chunk - j, &pages[npages - chunk + j], &latencies[j]);
627
628 /*
629 fprintf(stderr, "fixup_chunk: len=%d, chunk=%d, j=%d, baseline=%G, lat[%d]=%G..%G\n", len, chunk, j, *baseline, j, (j < chunk ? latencies[j] : -1.0), latencies[chunk - 1]);
630 /**/
631
632 if (chunk >= npages && j < chunk / 2) {
633 j = chunk / 2;
634 t = measure((npages - chunk + j + 1) * getpagesize(),
635 repetitions, &var, state);
636 chunk_baseline = t;
637 }
638
639 for (k = 0; j < chunk && k < 2 * npages; ++k) {
640 original = npages - chunk + j;
641 substitute = nsparepages - 1;
642 substitute -= (k + available_index) % (nsparepages - 1);
643 subset_len = (original + 1) * getpagesize();
644 if (j == chunk - 1 && len % getpagesize()) {
645 subset_len = len;
646 }
647
648 SWAP(pages[original], pageset[substitute]);
649 t = measure(subset_len, repetitions, &var, state);
650 SWAP(pages[original], pageset[substitute]);
651
652 /*
653 * try to keep pages ordered by increasing latency
654 */
655 if (t < latencies[chunk - 1]) {
656 latencies[chunk - 1] = t;
657 SWAP(pages[npages - 1], pageset[substitute]);
658 pagesort(chunk - j,
659 &pages[npages - chunk + j], &latencies[j]);
660 }
661 if (0.995 * latencies[j] <= chunk_baseline) {
662 ++j; /* keep this page */
663 ++swapped;
664 }
665 }
666
667 available_index = (k + available_index) % (nsparepages - 1);
668
669 /* measure new baseline, in case we didn't manage to optimally
670 * replace every page
671 */
672 if (swapped) {
673 new_baseline = measure(len, repetitions, &var, state);
674
675 /*
676 fprintf(stderr, "fixup_chunk: len=%d, swapped=%d, k=%d, baseline=%G, newbase=%G\n", len, swapped, k, *baseline, new_baseline);
677 /**/
678
679 if (new_baseline >= 0.999 * *baseline) {
680 /* no benefit to these changes; back them out */
681 swapped = 0;
682 bcopy(saved_pages, pages, sizeof(int) * ntotalpages);
683 } else {
684 /* we sped up, so keep these changes */
685 *baseline = new_baseline;
686
687 /* move back to the middle of the pagelist */
688 if (i + chunk < npages) {
689 for (j = 0; j < chunk; ++j) {
690 page = pages[i+j];
691 pages[i+j] = pages[npages-chunk+j];
692 pages[npages-chunk+j] = page;
693 }
694 }
695 }
696 /*
697 } else {
698 fprintf(stderr, "fixup_chunk: len=%d, swapped=%d, k=%d\n", len, swapped, k);
699 /**/
700 }
701 free(saved_pages);
702
703 return swapped;
704 }
705
706 void
check_memory(size_t size,struct mem_state * state)707 check_memory(size_t size, struct mem_state* state)
708 {
709 size_t i, j, first_page, npages, nwords;
710 size_t page, word_count, pagesize;
711 off_t offset;
712 char **p, **q;
713 char **start;
714
715 pagesize = getpagesize();
716 npages = (size + pagesize - 1) / pagesize;
717 nwords = size / sizeof(char*);
718
719 /*
720 fprintf(stderr, "check_memory(%d, ...): entering, %d words\n", size, nwords);
721 /**/
722 word_count = 1;
723 first_page = 0;
724 start = (char**)(state->base + state->pages[0] + state->lines[0] + state->words[0]);
725 for (q = p = (char**)*start; p != start; ) {
726 word_count++;
727 offset = (unsigned long)p - (unsigned long)state->base;
728 page = offset - offset % pagesize;
729 for (j = first_page; j < npages; ++j) {
730 if (page == state->pages[j]) break;
731 }
732 if (j == npages) {
733 for (j = 0; j < first_page; ++j) {
734 if (page == state->pages[j]) break;
735 }
736 if (j == first_page) {
737 fprintf(stderr,
738 "check_memory: bad memory reference for size %lu\n",
739 (unsigned long)size);
740 }
741 }
742 first_page = j % npages;
743 p = (char**)*p;
744 if (word_count & 0x1) q = (char**)*q;
745 if (*p == *q) {
746 fprintf(stderr, "check_memory: unwanted memory cycle! page=%lu\n", (unsigned long)j);
747 return;
748 }
749 }
750 if (word_count != nwords) {
751 fprintf(stderr, "check_memory: wrong word count, expected %lu, got %lu\n", (unsigned long)nwords, (unsigned long)word_count);
752 }
753 /*
754 fprintf(stderr, "check_memory(%lu, ...): exiting\n", (unsigned long)size);
755 /**/
756 }
757
758 void
pagesort(size_t n,size_t * pages,double * latencies)759 pagesort(size_t n, size_t* pages, double* latencies)
760 {
761 int i, j;
762 double t;
763
764 for (i = 0; i < n - 1; ++i) {
765 for (j = i + 1; j < n; ++j) {
766 if (latencies[i] > latencies[j]) {
767 t = latencies[i];
768 latencies[i] = latencies[j];
769 latencies[j] = t;
770 SWAP(pages[i], pages[j]);
771 }
772 }
773 }
774 }
775