1 /* 2 * MBWTEST.C 3 * 4 * (c)Copyright 2003 Matthew Dillon. This code is hereby placed in the public 5 * domain. 6 * 7 * Attempt to figure out the L1 and L2 cache sizes and measure memory 8 * bandwidth for the L1 and L2 cache and for non-cache memory. 9 * 10 * $DragonFly: src/test/sysperf/mbwtest.c,v 1.1 2003/11/13 07:10:36 dillon Exp $ 11 */ 12 13 #include <sys/file.h> 14 #include <sys/time.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <unistd.h> 18 #include <string.h> 19 20 #define MAXBYTES (16*1024*1024) 21 22 static int bandwidth_test(char *buf, int loops, int bytes, char *msg); 23 static void start_timing(void); 24 static int stop_timing(char *str, long long bytes); 25 26 int 27 main(int ac, char **av) 28 { 29 char *buf; 30 int loops; 31 int us1; 32 int us2; 33 long long count1; 34 long long count2; 35 long long count3; 36 long long count4; 37 int bytes1; 38 int bytes2; 39 int bytes3; 40 int bytes4; 41 42 buf = malloc(MAXBYTES * 2); 43 bzero(buf, MAXBYTES * 2); 44 45 /* 46 * Get a baseline for 1/4 second L1 cache timing maximizing the number 47 * of loops. The minimum L1 cache size is 4K. 48 */ 49 start_timing(); 50 us1 = bandwidth_test(buf, 1000, 4096, NULL); /* uS per 1000 loops */ 51 loops = 1000000LL * 1000 / 4 / us1; /* loops for 1/4 sec */ 52 count1 = loops * 4096LL; 53 start_timing(); 54 us1 = bandwidth_test(buf, loops, 4096, NULL); /* best case timing */ 55 printf("."); fflush(stdout); usleep(1000000 / 4); 56 57 /* 58 * Search for the L1 cache size. Look for a 20% difference in bandwidth 59 */ 60 bzero(buf, 4096); 61 start_timing(); 62 us1 = bandwidth_test(buf, count1 / 4096 + 20, 4096, NULL); 63 for (bytes1 = 8192; bytes1 < MAXBYTES; bytes1 <<= 1) { 64 start_timing(); 65 us2 = bandwidth_test(buf, count1 / bytes1 + 20, bytes1, NULL); 66 if (us2 > us1 + us1 / 5) 67 break; 68 } 69 bytes1 >>= 1; /* actual L1 cache size */ 70 count2 = count1 * us1 / us2; 71 printf("."); fflush(stdout); usleep(1000000 / 4); 72 73 bytes2 = bytes1 << 1; 74 bzero(buf, bytes2); 75 start_timing(); 76 us1 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL); 77 for (bytes2 <<= 1; bytes2 < MAXBYTES; bytes2 <<= 1) { 78 start_timing(); 79 us2 = bandwidth_test(buf, count2 / bytes2 + 20, bytes2, NULL); 80 if (us2 > us1 + us1 / 5) 81 break; 82 } 83 count3 = count2 * us1 / us2; 84 bytes2 >>= 1; /* actual L2 cache size */ 85 86 bytes3 = bytes2 << 1; 87 bzero(buf, bytes3); 88 start_timing(); 89 us1 = bandwidth_test(buf, count3 / bytes3 + 20, bytes3, NULL); 90 for (bytes3 <<= 1; bytes3 < MAXBYTES; bytes3 <<= 1) { 91 start_timing(); 92 us2 = bandwidth_test(buf, count3 / bytes3 + 20, bytes3, NULL); 93 if (us2 > us1 + us1 / 5) 94 break; 95 } 96 count4 = count3 * us1 / us2; 97 bytes3 >>= 1; /* actual L3 cache size */ 98 99 /* 100 * Final run to generate output 101 */ 102 printf("\nL1 cache size: %d\n", bytes1); 103 104 if (bytes2 == MAXBYTES) 105 printf("L2 cache size: No L2 cache found\n"); 106 else 107 printf("L2 cache size: %d\n", bytes2); 108 109 if (bytes3 == MAXBYTES) 110 printf("L3 cache size: No L3 cache found\n"); 111 else 112 printf("L3 cache size: %d\n", bytes3); 113 114 sleep(1); 115 start_timing(); 116 bandwidth_test(buf, count1 / bytes1 + 20, bytes1, "L1 cache bandwidth"); 117 if (bytes2 != MAXBYTES) { 118 start_timing(); 119 bandwidth_test(buf, count2 / bytes2 + 20, bytes2, 120 "L2 cache bandwidth"); 121 } 122 if (bytes3 != MAXBYTES) { 123 start_timing(); 124 bandwidth_test(buf, count3 / bytes3 + 20, bytes3, 125 "L3 cache bandwidth"); 126 } 127 128 /* 129 * Set bytes2 to exceed the L2 cache size 130 */ 131 bytes4 = bytes3 << 1; 132 if (bytes4 < MAXBYTES) 133 bytes4 <<= 1; 134 start_timing(); 135 bandwidth_test(buf, count4 / bytes4 + 20, bytes4, "non-cache bandwidth"); 136 return(0); 137 } 138 139 struct timeval tv1; 140 struct timeval tv2; 141 142 static 143 int 144 bandwidth_test(char *buf, int loops, int bytes, char *msg) 145 { 146 register char *bptr; 147 register char *lptr; 148 register int v; 149 int j; 150 int us; 151 152 lptr = buf + bytes; 153 for (j = 0; j < loops; ++j) { 154 for (bptr = buf; bptr < lptr; bptr += 32) { 155 v = *(volatile int *)(bptr + 0); 156 v = *(volatile int *)(bptr + 4); 157 v = *(volatile int *)(bptr + 8); 158 v = *(volatile int *)(bptr + 12); 159 v = *(volatile int *)(bptr + 16); 160 v = *(volatile int *)(bptr + 20); 161 v = *(volatile int *)(bptr + 24); 162 v = *(volatile int *)(bptr + 28); 163 } 164 } 165 us = stop_timing(msg, (long long)bytes * loops); 166 return(us); 167 } 168 169 static 170 void 171 start_timing(void) 172 { 173 gettimeofday(&tv1, NULL); 174 } 175 176 static 177 int 178 stop_timing(char *str, long long bytes) 179 { 180 int us; 181 182 gettimeofday(&tv2, NULL); 183 184 us = tv2.tv_usec + 1000000 - tv1.tv_usec + 185 (tv2.tv_sec - tv1.tv_sec - 1) * 1000000; 186 if (str) { 187 printf("%s: %4.2f Mbytes/sec\n", 188 str, 189 (double)bytes * 1000000.0 / ((double)us * 1024.0 * 1024.0)); 190 } 191 return(us); 192 } 193 194