1 /*- 2 * Copyright (c) 2014 Hudson River Trading LLC 3 * Written by: John H. Baldwin <jhb@FreeBSD.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/cpuset.h> 32 #include <machine/atomic.h> 33 #include <machine/cpu.h> 34 #include <machine/cpufunc.h> 35 #include <assert.h> 36 #include <err.h> 37 #include <errno.h> 38 #include <math.h> 39 #include <pthread.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <stdio.h> 43 44 #define barrier() __asm __volatile("" ::: "memory") 45 46 #define TESTS 1024 47 48 static volatile int gate; 49 static volatile uint64_t thread_tsc; 50 51 /* Bind the current thread to the specified CPU. */ 52 static void 53 bind_cpu(int cpu) 54 { 55 cpuset_t set; 56 57 CPU_ZERO(&set); 58 CPU_SET(cpu, &set); 59 if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set), 60 &set) < 0) 61 err(1, "cpuset_setaffinity(%d)", cpu); 62 } 63 64 static void * 65 thread_main(void *arg) 66 { 67 int cpu, i; 68 69 cpu = (intptr_t)arg; 70 bind_cpu(cpu); 71 for (i = 0; i < TESTS; i++) { 72 gate = 1; 73 while (gate == 1) 74 cpu_spinwait(); 75 barrier(); 76 77 __asm __volatile("lfence"); 78 thread_tsc = rdtsc(); 79 80 barrier(); 81 gate = 3; 82 while (gate == 3) 83 cpu_spinwait(); 84 } 85 return (NULL); 86 } 87 88 int 89 main(int ac __unused, char **av __unused) 90 { 91 cpuset_t all_cpus; 92 int64_t **skew, *aveskew, *minskew, *maxskew; 93 float *stddev; 94 double sumsq; 95 pthread_t child; 96 uint64_t tsc; 97 int *cpus; 98 int error, i, j, ncpu; 99 100 /* 101 * Find all the CPUs this program is eligible to run on and use 102 * this as our global set. This means you can use cpuset to 103 * restrict this program to only run on a subset of CPUs. 104 */ 105 if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, 106 sizeof(all_cpus), &all_cpus) < 0) 107 err(1, "cpuset_getaffinity"); 108 for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) { 109 if (CPU_ISSET(i, &all_cpus)) 110 ncpu++; 111 } 112 if (ncpu < 2) 113 errx(1, "Only one available CPU"); 114 cpus = calloc(ncpu, sizeof(*cpus)); 115 skew = calloc(ncpu, sizeof(*skew)); 116 for (i = 0; i < ncpu; i++) 117 skew[i] = calloc(TESTS, sizeof(*skew[i])); 118 for (i = 0, j = 0; i < CPU_SETSIZE; i++) 119 if (CPU_ISSET(i, &all_cpus)) { 120 assert(j < ncpu); 121 cpus[j] = i; 122 j++; 123 } 124 125 /* 126 * We bind this thread to the first CPU and then bind all the 127 * other threads to other CPUs in turn saving TESTS counts of 128 * skew calculations. 129 */ 130 bind_cpu(cpus[0]); 131 for (i = 1; i < ncpu; i++) { 132 error = pthread_create(&child, NULL, thread_main, 133 (void *)(intptr_t)cpus[i]); 134 if (error) 135 errc(1, error, "pthread_create"); 136 137 for (j = 0; j < TESTS; j++) { 138 while (gate != 1) 139 cpu_spinwait(); 140 gate = 2; 141 barrier(); 142 143 tsc = rdtsc(); 144 145 barrier(); 146 while (gate != 3) 147 cpu_spinwait(); 148 gate = 4; 149 150 skew[i][j] = thread_tsc - tsc; 151 } 152 153 error = pthread_join(child, NULL); 154 if (error) 155 errc(1, error, "pthread_join"); 156 } 157 158 /* 159 * Compute average skew for each CPU and output a summary of 160 * the results. 161 */ 162 aveskew = calloc(ncpu, sizeof(*aveskew)); 163 minskew = calloc(ncpu, sizeof(*minskew)); 164 maxskew = calloc(ncpu, sizeof(*maxskew)); 165 stddev = calloc(ncpu, sizeof(*stddev)); 166 stddev[0] = 0.0; 167 for (i = 1; i < ncpu; i++) { 168 sumsq = 0; 169 minskew[i] = maxskew[i] = skew[i][0]; 170 for (j = 0; j < TESTS; j++) { 171 aveskew[i] += skew[i][j]; 172 if (skew[i][j] < minskew[i]) 173 minskew[i] = skew[i][j]; 174 if (skew[i][j] > maxskew[i]) 175 maxskew[i] = skew[i][j]; 176 sumsq += (skew[i][j] * skew[i][j]); 177 } 178 aveskew[i] /= TESTS; 179 sumsq /= TESTS; 180 sumsq -= aveskew[i] * aveskew[i]; 181 stddev[i] = sqrt(sumsq); 182 } 183 184 printf("CPU | TSC skew (min/avg/max/stddev)\n"); 185 printf("----+------------------------------\n"); 186 for (i = 0; i < ncpu; i++) 187 printf("%3d | %5jd %5jd %5jd %6.3f\n", cpus[i], 188 (intmax_t)minskew[i], (intmax_t)aveskew[i], 189 (intmax_t)maxskew[i], stddev[i]); 190 return (0); 191 } 192