1 /* 2 * MBWTEST.C 3 * 4 * (c)Copyright 2003 Matthew Dillon. This code is hereby placed in the public 5 * domain. 6 * 7 * Attempt to figure out the L1 and L2 cache sizes and measure memory 8 * bandwidth for the L1 and L2 cache and for non-cache memory. 9 * 10 * $DragonFly: src/test/sysperf/mbwtest.c,v 1.1 2003/11/13 07:10:36 dillon Exp $ 11 */ 12 13 #include <sys/file.h> 14 #include <sys/time.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 19 #define MAXBYTES (16*1024*1024) 20 21 static int bandwidth_test(char *buf, int loops, int bytes, char *msg); 22 static void start_timing(void); 23 static int stop_timing(char *str, long long bytes); 24 25 int 26 main(int ac, char **av) 27 { 28 char *buf; 29 int loops; 30 int us1; 31 int us2; 32 long long count1; 33 long long count2; 34 long long count3; 35 int bytes1; 36 int bytes2; 37 int bytes3; 38 39 buf = malloc(MAXBYTES * 2); 40 bzero(buf, MAXBYTES * 2); 41 42 /* 43 * Get a baseline for 1/4 second L1 cache timing maximizing the number 44 * of loops. The minimum L1 cache size is 4K. 45 */ 46 start_timing(); 47 us1 = bandwidth_test(buf, 1000, 4096, NULL); /* uS per 1000 loops */ 48 loops = 1000000LL * 1000 / 4 / us1; /* loops for 1/4 sec */ 49 count1 = loops * 4096LL; 50 start_timing(); 51 us1 = bandwidth_test(buf, loops, 4096, NULL); /* best case timing */ 52 printf("."); fflush(stdout); usleep(1000000 / 4); 53 54 /* 55 * Search for the L1 cache size. Look for a 20% difference in bandwidth 56 */ 57 bzero(buf, 4096); 58 start_timing(); 59 us1 = bandwidth_test(buf, count1 / 4096 + 20, 4096, NULL); 60 for (bytes1 = 8192; bytes1 < MAXBYTES; bytes1 <<= 1) { 61 start_timing(); 62 us2 = bandwidth_test(buf, count1 / bytes1 + 20, bytes1, NULL); 63 if (us2 > us1 + us1 / 5) 64 break; 65 } 66 bytes1 >>= 1; /* actual L1 cache size */ 67 count2 = count1 * us1 / us2; 68 printf("."); fflush(stdout); usleep(1000000 / 4); 69 70 bytes2 = bytes1 << 1; 71 bzero(buf, bytes2); 72 start_timing(); 73 us1 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL); 74 for (bytes2 <<= 1; bytes2 < MAXBYTES; bytes2 <<= 1) { 75 start_timing(); 76 us2 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL); 77 if (us2 > us1 + us1 / 5) 78 break; 79 } 80 count3 = count2 * us1 / us2; 81 bytes2 >>= 1; /* actual L2 cache size */ 82 83 /* 84 * Final run to generate output 85 */ 86 printf("\nL1 cache size: %d\n", bytes1); 87 if (bytes2 == MAXBYTES) 88 printf("L2 cache size: No L2 cache found\n"); 89 else 90 printf("L2 cache size: %d\n", bytes2); 91 sleep(1); 92 start_timing(); 93 bandwidth_test(buf, count1 / bytes1 + 20, bytes1, "L1 cache bandwidth"); 94 if (bytes2 != MAXBYTES) { 95 start_timing(); 96 bandwidth_test(buf, count2 / bytes2 + 20, bytes2, 97 "L2 cache bandwidth"); 98 } 99 100 /* 101 * Set bytes2 to exceed the L2 cache size 102 */ 103 bytes2 <<= 1; 104 if (bytes2 < MAXBYTES) 105 bytes2 <<= 1; 106 start_timing(); 107 bandwidth_test(buf, count3 / bytes2 + 20, bytes2, "non-cache bandwidth"); 108 return(0); 109 } 110 111 struct timeval tv1; 112 struct timeval tv2; 113 114 static 115 int 116 bandwidth_test(char *buf, int loops, int bytes, char *msg) 117 { 118 register char *bptr; 119 register char *lptr; 120 register int v; 121 int j; 122 int us; 123 124 lptr = buf + bytes; 125 for (j = 0; j < loops; ++j) { 126 for (bptr = buf; bptr < lptr; bptr += 32) { 127 v = *(volatile int *)(bptr + 0); 128 v = *(volatile int *)(bptr + 4); 129 v = *(volatile int *)(bptr + 8); 130 v = *(volatile int *)(bptr + 12); 131 v = *(volatile int *)(bptr + 16); 132 v = *(volatile int *)(bptr + 20); 133 v = *(volatile int *)(bptr + 24); 134 v = *(volatile int *)(bptr + 28); 135 } 136 } 137 us = stop_timing(msg, (long long)bytes * loops); 138 return(us); 139 } 140 141 static 142 void 143 start_timing(void) 144 { 145 gettimeofday(&tv1, NULL); 146 } 147 148 static 149 int 150 stop_timing(char *str, long long bytes) 151 { 152 int us; 153 154 gettimeofday(&tv2, NULL); 155 156 us = tv2.tv_usec + 1000000 - tv1.tv_usec + 157 (tv2.tv_sec - tv1.tv_sec - 1) * 1000000; 158 if (str) { 159 printf("%s: %4.2f Mbytes/sec\n", 160 str, 161 (double)bytes * 1000000.0 / ((double)us * 1024.0 * 1024.0)); 162 } 163 return(us); 164 } 165 166