1 /* dzl-counter.h 2 * 3 * Copyright (C) 2013-2015 Christian Hergert <christian@hergert.me> 4 * 5 * This file is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Lesser General Public 7 * License as published by the Free Software Foundation; either 8 * version 2.1 of the License, or (at your option) any later version. 9 * 10 * This file is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Lesser General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program. If not, see <http://www.gnu.org/licenses/>. 17 * 18 * Additionally, this file does not claim copyright over the expansion 19 * of macros in your source program. 20 */ 21 22 #ifndef DZL_COUNTER_H 23 #define DZL_COUNTER_H 24 25 #include <glib-object.h> 26 27 #include "dzl-version-macros.h" 28 29 /* 30 * History 31 * ======= 32 * 33 * DzlCounter is a performance counter based on ideas from previous work 34 * on high performance counters. They are not guaranteed to be 100% 35 * correct, but they approach that with no synchronization given new 36 * enough hardware. In particular, we use %ecx from rdtscp (the core id) 37 * to determine which cachline to increment the counter within. 38 * 39 * Given a counter, the value will be split up int NCPU cachelines where 40 * NCPU is the number of cores returned from get_nprocs() (on Linux). 41 * 42 * Updating the counter is very cheap, reading back the counter requires 43 * a volatile read of each cacheline. Again, no correctness is guaranteed. 44 * 45 * In practice, very few values are lost even during tight competing loops. 46 * A loss can happen when the thread is pre-empted between the %rdtscp 47 * instruction and the addq increment (on x86_64). 48 * 49 * 50 * Using DzlCounter 51 * ================ 52 * 53 * To define a counter, you must have support for constructor attributes. 54 * 55 * DZL_DEFINE_COUNTER (Symbol, "Category", "Name", "Description") 56 * 57 * To increment the counter in a function of your choice (but within the 58 * same module), use DZL_COUNTER_ADD, DZL_COUNTER_INC, DZL_COUNTER_DEC. 59 * 60 * DZL_COUNTER_INC (Symbol); 61 * 62 * 63 * Architecture Support 64 * ==================== 65 * 66 * If you are not on x86_64, or are missing the rdtscp instruction, a 64-bit 67 * atomic will be performed using __sync_fetch_and_add8(). Clearly, we can 68 * do some more work here to abstract which implementation is used, but we 69 * only support GCC and Clang today, which both have that intrinsic. Some 70 * architectures may not have it (such as 32-bit PPC), but I'm not too 71 * concerned about that at the moment. 72 * 73 * The counters are mapped into a shared memory zone using shm_open() and 74 * mmap(). An external program can then discover the available counters 75 * and print them without blocking the target program. It simply must 76 * perform the reads in a volatile manner just like the target process 77 * would need to do for readback. 78 * 79 * DzlCounterArena provides a helper to walk through the counters in the 80 * shared memory zone. dzl_counter_arena_foreach(). 81 * 82 * You cannot remove a counter once it has been registered. 83 * 84 * 85 * Accessing Counters Remotely 86 * =========================== 87 * 88 * You can access the counters from out of process. By opening the SHM zone 89 * and reading the contents from each cachline, you can get the approximate 90 * state of the target application without blocking it. 91 * 92 * DzlCounterArena provides a helper for you to do this. 93 * 94 * DzlCounterArena *arena; 95 * 96 * arena = dzl_counter_arena_new_for_pid (other_process_pid); 97 * dzl_counter_arena_foreach (arena, my_counter_callback, user_data); 98 * 99 * 100 * Data Layout 101 * =========== 102 * 103 * The layout of the shared memory zone is broken into "cells". Each cell 104 * is an approximate cacheline (64-bytes) on modern Intel hardware. Indexes 105 * to data locations are represented in cells to simplify the math and 106 * allow the compiler to know we are working with properly aligned structures. 107 * 108 * The base pointer in DzlCounter.values is not 64-byte aligned! It is 8-byte 109 * aligned and points to the offset within the cacheline for that counter. 110 * We pack 8 64-bit counters into a single cacheline. This allows us to avoid 111 * an extra MOV instruction when incrementing since we only need to perform 112 * the offset from the base pointer. 113 * 114 * The first two cells are the header which contain information about the 115 * underlying shm file and how large the mmap() range should be. 116 * 117 * After that, begin the counters. 118 * 119 * The counters are layed out in groups of 8 counters. 120 * 121 * [8 CounterInfo Structs (128-bytes each)][N_CPU Data Zones (64-byte each)] 122 * 123 * See dzl-counter.c for more information on the contents of these structures. 124 * 125 * 126 * Build System Requirements 127 * ========================= 128 * 129 * We need to know if rdtscp is available at compile time. In an effort 130 * to keep the headers as portable as possible (if that matters here?) we 131 * require that you define DZL_HAVE_RDTSCP if the instruction is supported. 132 * 133 * An example for autoconf might be similar to the following: 134 * 135 * AC_MSG_CHECKING([for fast counters with rdtscp]) 136 * AC_RUN_IFELSE( 137 * [AC_LANG_SOURCE([[ 138 * #include <x86intrin.h> 139 * int main (int argc, char *argv[]) { int cpu; __builtin_ia32_rdtscp (&cpu); return 0; }]])], 140 * [have_rdtscp=yes], 141 * [have_rdtscp=no]) 142 * AC_MSG_RESULT([$have_rdtscp]) 143 * AS_IF([test "$have_rdtscp" = "yes"], 144 * [CFLAGS="$CFLAGS -DDZL_HAVE_RDTSCP"]) 145 */ 146 147 G_BEGIN_DECLS 148 149 #ifdef DZL_HAVE_RDTSCP 150 # include <x86intrin.h> 151 static inline guint dzl_get_current_cpu_rdtscp(void)152 dzl_get_current_cpu_rdtscp (void) 153 { 154 /* 155 * This extracts the IA32_TSC_AUX into the ecx register. On Linux, 156 * that value contains a value with the bottom 12 bits being the 157 * cpu identifier, and the next 10 bits being the node group. 158 */ 159 guint aux; 160 __builtin_ia32_rdtscp (&aux); 161 return aux & 0xFFF; 162 } 163 # define dzl_get_current_cpu() dzl_get_current_cpu_rdtscp() 164 #elif defined(__linux__) 165 # define dzl_get_current_cpu() dzl_get_current_cpu_call() 166 #elif defined(__powerpc__) && !defined(__powerpc64__) 167 # define dzl_get_current_cpu() 0 168 # undef DZL_COUNTER_REQUIRES_ATOMIC 169 #else 170 # define dzl_get_current_cpu() 0 171 # define DZL_COUNTER_REQUIRES_ATOMIC 1 172 #endif 173 174 /** 175 * DZL_DEFINE_COUNTER: 176 * @Identifier: The symbol name of the counter 177 * @Category: A string category for the counter. 178 * @Name: A string name for the counter. 179 * @Description: A string description for the counter. 180 * 181 * |[<!-- language="C" --> 182 * DZL_DEFINE_COUNTER (my_counter, "My", "Counter", "My Counter Description"); 183 * ]| 184 */ 185 #define DZL_DEFINE_COUNTER(Identifier, Category, Name, Description) \ 186 static DzlCounter Identifier##_ctr = { NULL, Category, Name, Description }; \ 187 static void Identifier##_ctr_init (void) __attribute__((constructor)); \ 188 static void \ 189 Identifier##_ctr_init (void) \ 190 { \ 191 dzl_counter_arena_register (dzl_counter_arena_get_default(), &Identifier##_ctr); \ 192 } 193 194 /** 195 * DZL_COUNTER_INC: 196 * @Identifier: The identifier of the counter. 197 * 198 * Increments the counter @Identifier by 1. 199 */ 200 #define DZL_COUNTER_INC(Identifier) DZL_COUNTER_ADD(Identifier, G_GINT64_CONSTANT(1)) 201 202 /** 203 * DZL_COUNTER_DEC: 204 * @Identifier: The identifier of the counter. 205 * 206 * Decrements the counter @Identifier by 1. 207 */ 208 #define DZL_COUNTER_DEC(Identifier) DZL_COUNTER_SUB(Identifier, G_GINT64_CONSTANT(1)) 209 210 /** 211 * DZL_COUNTER_SUB: 212 * @Identifier: The identifier of the counter. 213 * @Count: the amount to subtract. 214 * 215 * Subtracts from the counter identified by @Identifier by @Count. 216 */ 217 #define DZL_COUNTER_SUB(Identifier, Count) DZL_COUNTER_ADD(Identifier, (-(Count))) 218 219 /** 220 * DZL_COUNTER_ADD: 221 * @Identifier: The identifier of the counter. 222 * @Count: the amount to add to the counter. 223 * 224 * Adds @Count to @Identifier. 225 * 226 * This operation is not guaranteed to have full correctness. It tries to find 227 * a happy medium between fast, and accurate. When possible, the %rdtscp 228 * instruction is used to get a cacheline owned by the executing CPU, to avoid 229 * collisions. However, this is not guaranteed as the thread could be swapped 230 * between the calls to %rdtscp and %addq (on 64-bit Intel). 231 * 232 * Other platforms have fallbacks which may give different guarantees, such as 233 * using atomic operations (and therefore, memory barriers). 234 * 235 * See #DzlCounter for more information. 236 */ 237 #ifdef DZL_COUNTER_REQUIRES_ATOMIC 238 # define DZL_COUNTER_ADD(Identifier, Count) \ 239 G_STMT_START { \ 240 __sync_add_and_fetch ((gint64 *)&Identifier##_ctr.values[0], ((gint64)(Count))); \ 241 } G_STMT_END 242 #else 243 # define DZL_COUNTER_ADD(Identifier, Count) \ 244 G_STMT_START { \ 245 Identifier##_ctr.values[dzl_get_current_cpu()].value += ((gint64)(Count)); \ 246 } G_STMT_END 247 #endif 248 249 typedef struct _DzlCounter DzlCounter; 250 typedef struct _DzlCounterArena DzlCounterArena; 251 typedef struct _DzlCounterValue DzlCounterValue; 252 253 /** 254 * DzlCounterForeachFunc: 255 * @counter: the counter. 256 * @user_data: data supplied to dzl_counter_arena_foreach(). 257 * 258 * Function prototype for callbacks provided to dzl_counter_arena_foreach(). 259 */ 260 typedef void (*DzlCounterForeachFunc) (DzlCounter *counter, 261 gpointer user_data); 262 263 struct _DzlCounter 264 { 265 /*< Private >*/ 266 DzlCounterValue *values; 267 const gchar *category; 268 const gchar *name; 269 const gchar *description; 270 } __attribute__ ((aligned(8))); 271 272 struct _DzlCounterValue 273 { 274 volatile gint64 value; 275 gint64 padding [7]; 276 } __attribute__ ((aligned(8))); 277 278 DZL_AVAILABLE_IN_ALL 279 GType dzl_counter_arena_get_type (void); 280 DZL_AVAILABLE_IN_ALL 281 guint dzl_get_current_cpu_call (void); 282 DZL_AVAILABLE_IN_ALL 283 DzlCounterArena *dzl_counter_arena_get_default (void); 284 DZL_AVAILABLE_IN_ALL 285 DzlCounterArena *dzl_counter_arena_new_for_pid (GPid pid); 286 DZL_AVAILABLE_IN_ALL 287 DzlCounterArena *dzl_counter_arena_ref (DzlCounterArena *arena); 288 DZL_AVAILABLE_IN_ALL 289 void dzl_counter_arena_unref (DzlCounterArena *arena); 290 DZL_AVAILABLE_IN_ALL 291 void dzl_counter_arena_register (DzlCounterArena *arena, 292 DzlCounter *counter); 293 DZL_AVAILABLE_IN_ALL 294 void dzl_counter_arena_foreach (DzlCounterArena *arena, 295 DzlCounterForeachFunc func, 296 gpointer user_data); 297 DZL_AVAILABLE_IN_ALL 298 void dzl_counter_reset (DzlCounter *counter); 299 DZL_AVAILABLE_IN_ALL 300 gint64 dzl_counter_get (DzlCounter *counter); 301 302 G_END_DECLS 303 304 #endif /* DZL_COUNTER_H */ 305