1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// CPU profiling. 6// Based on algorithms and data structures used in 7// http://code.google.com/p/google-perftools/. 8// 9// The main difference between this code and the google-perftools 10// code is that this code is written to allow copying the profile data 11// to an arbitrary io.Writer, while the google-perftools code always 12// writes to an operating system file. 13// 14// The signal handler for the profiling clock tick adds a new stack trace 15// to a hash table tracking counts for recent traces. Most clock ticks 16// hit in the cache. In the event of a cache miss, an entry must be 17// evicted from the hash table, copied to a log that will eventually be 18// written as profile data. The google-perftools code flushed the 19// log itself during the signal handler. This code cannot do that, because 20// the io.Writer might block or need system calls or locks that are not 21// safe to use from within the signal handler. Instead, we split the log 22// into two halves and let the signal handler fill one half while a goroutine 23// is writing out the other half. When the signal handler fills its half, it 24// offers to swap with the goroutine. If the writer is not done with its half, 25// we lose the stack trace for this clock tick (and record that loss). 26// The goroutine interacts with the signal handler by calling getprofile() to 27// get the next log piece to write, implicitly handing back the last log 28// piece it obtained. 29// 30// The state of this dance between the signal handler and the goroutine 31// is encoded in the Profile.handoff field. If handoff == 0, then the goroutine 32// is not using either log half and is waiting (or will soon be waiting) for 33// a new piece by calling notesleep(&p->wait). If the signal handler 34// changes handoff from 0 to non-zero, it must call notewakeup(&p->wait) 35// to wake the goroutine. The value indicates the number of entries in the 36// log half being handed off. The goroutine leaves the non-zero value in 37// place until it has finished processing the log half and then flips the number 38// back to zero. Setting the high bit in handoff means that the profiling is over, 39// and the goroutine is now in charge of flushing the data left in the hash table 40// to the log and returning that data. 41// 42// The handoff field is manipulated using atomic operations. 43// For the most part, the manipulation of handoff is orderly: if handoff == 0 44// then the signal handler owns it and can change it to non-zero. 45// If handoff != 0 then the goroutine owns it and can change it to zero. 46// If that were the end of the story then we would not need to manipulate 47// handoff using atomic operations. The operations are needed, however, 48// in order to let the log closer set the high bit to indicate "EOF" safely 49// in the situation when normally the goroutine "owns" handoff. 50 51package runtime 52#include "runtime.h" 53#include "arch.h" 54#include "malloc.h" 55 56#include "array.h" 57typedef struct __go_open_array Slice; 58#define array __values 59#define len __count 60#define cap __capacity 61 62enum 63{ 64 HashSize = 1<<10, 65 LogSize = 1<<17, 66 Assoc = 4, 67 MaxStack = 64, 68}; 69 70typedef struct Profile Profile; 71typedef struct Bucket Bucket; 72typedef struct Entry Entry; 73 74struct Entry { 75 uintptr count; 76 uintptr depth; 77 uintptr stack[MaxStack]; 78}; 79 80struct Bucket { 81 Entry entry[Assoc]; 82}; 83 84struct Profile { 85 bool on; // profiling is on 86 Note wait; // goroutine waits here 87 uintptr count; // tick count 88 uintptr evicts; // eviction count 89 uintptr lost; // lost ticks that need to be logged 90 91 // Active recent stack traces. 92 Bucket hash[HashSize]; 93 94 // Log of traces evicted from hash. 95 // Signal handler has filled log[toggle][:nlog]. 96 // Goroutine is writing log[1-toggle][:handoff]. 97 uintptr log[2][LogSize/2]; 98 uintptr nlog; 99 int32 toggle; 100 uint32 handoff; 101 102 // Writer state. 103 // Writer maintains its own toggle to avoid races 104 // looking at signal handler's toggle. 105 uint32 wtoggle; 106 bool wholding; // holding & need to release a log half 107 bool flushing; // flushing hash table - profile is over 108 bool eod_sent; // special end-of-data record sent; => flushing 109}; 110 111static Lock lk; 112static Profile *prof; 113 114static void tick(uintptr*, int32); 115static void add(Profile*, uintptr*, int32); 116static bool evict(Profile*, Entry*); 117static bool flushlog(Profile*); 118 119static uintptr eod[3] = {0, 1, 0}; 120 121// LostProfileData is a no-op function used in profiles 122// to mark the number of profiling stack traces that were 123// discarded due to slow data writers. 124static void 125LostProfileData(void) 126{ 127} 128 129extern void runtime_SetCPUProfileRate(intgo) 130 __asm__ (GOSYM_PREFIX "runtime.SetCPUProfileRate"); 131 132// SetCPUProfileRate sets the CPU profiling rate. 133// The user documentation is in debug.go. 134void 135runtime_SetCPUProfileRate(intgo hz) 136{ 137 uintptr *p; 138 uintptr n; 139 140 // Clamp hz to something reasonable. 141 if(hz < 0) 142 hz = 0; 143 if(hz > 1000000) 144 hz = 1000000; 145 146 runtime_lock(&lk); 147 if(hz > 0) { 148 if(prof == nil) { 149 prof = runtime_SysAlloc(sizeof *prof, &mstats.other_sys); 150 if(prof == nil) { 151 runtime_printf("runtime: cpu profiling cannot allocate memory\n"); 152 runtime_unlock(&lk); 153 return; 154 } 155 } 156 if(prof->on || prof->handoff != 0) { 157 runtime_printf("runtime: cannot set cpu profile rate until previous profile has finished.\n"); 158 runtime_unlock(&lk); 159 return; 160 } 161 162 prof->on = true; 163 p = prof->log[0]; 164 // pprof binary header format. 165 // http://code.google.com/p/google-perftools/source/browse/trunk/src/profiledata.cc#117 166 *p++ = 0; // count for header 167 *p++ = 3; // depth for header 168 *p++ = 0; // version number 169 *p++ = 1000000 / hz; // period (microseconds) 170 *p++ = 0; 171 prof->nlog = p - prof->log[0]; 172 prof->toggle = 0; 173 prof->wholding = false; 174 prof->wtoggle = 0; 175 prof->flushing = false; 176 prof->eod_sent = false; 177 runtime_noteclear(&prof->wait); 178 179 runtime_setcpuprofilerate(tick, hz); 180 } else if(prof != nil && prof->on) { 181 runtime_setcpuprofilerate(nil, 0); 182 prof->on = false; 183 184 // Now add is not running anymore, and getprofile owns the entire log. 185 // Set the high bit in prof->handoff to tell getprofile. 186 for(;;) { 187 n = prof->handoff; 188 if(n&0x80000000) 189 runtime_printf("runtime: setcpuprofile(off) twice"); 190 if(runtime_cas(&prof->handoff, n, n|0x80000000)) 191 break; 192 } 193 if(n == 0) { 194 // we did the transition from 0 -> nonzero so we wake getprofile 195 runtime_notewakeup(&prof->wait); 196 } 197 } 198 runtime_unlock(&lk); 199} 200 201static void 202tick(uintptr *pc, int32 n) 203{ 204 add(prof, pc, n); 205} 206 207// add adds the stack trace to the profile. 208// It is called from signal handlers and other limited environments 209// and cannot allocate memory or acquire locks that might be 210// held at the time of the signal, nor can it use substantial amounts 211// of stack. It is allowed to call evict. 212static void 213add(Profile *p, uintptr *pc, int32 n) 214{ 215 int32 i, j; 216 uintptr h, x; 217 Bucket *b; 218 Entry *e; 219 220 if(n > MaxStack) 221 n = MaxStack; 222 223 // Compute hash. 224 h = 0; 225 for(i=0; i<n; i++) { 226 h = h<<8 | (h>>(8*(sizeof(h)-1))); 227 x = pc[i]; 228 h += x*31 + x*7 + x*3; 229 } 230 p->count++; 231 232 // Add to entry count if already present in table. 233 b = &p->hash[h%HashSize]; 234 for(i=0; i<Assoc; i++) { 235 e = &b->entry[i]; 236 if(e->depth != (uintptr)n) 237 continue; 238 for(j=0; j<n; j++) 239 if(e->stack[j] != pc[j]) 240 goto ContinueAssoc; 241 e->count++; 242 return; 243 ContinueAssoc:; 244 } 245 246 // Evict entry with smallest count. 247 e = &b->entry[0]; 248 for(i=1; i<Assoc; i++) 249 if(b->entry[i].count < e->count) 250 e = &b->entry[i]; 251 if(e->count > 0) { 252 if(!evict(p, e)) { 253 // Could not evict entry. Record lost stack. 254 p->lost++; 255 return; 256 } 257 p->evicts++; 258 } 259 260 // Reuse the newly evicted entry. 261 e->depth = n; 262 e->count = 1; 263 for(i=0; i<n; i++) 264 e->stack[i] = pc[i]; 265} 266 267// evict copies the given entry's data into the log, so that 268// the entry can be reused. evict is called from add, which 269// is called from the profiling signal handler, so it must not 270// allocate memory or block. It is safe to call flushLog. 271// evict returns true if the entry was copied to the log, 272// false if there was no room available. 273static bool 274evict(Profile *p, Entry *e) 275{ 276 int32 i, d, nslot; 277 uintptr *log, *q; 278 279 d = e->depth; 280 nslot = d+2; 281 log = p->log[p->toggle]; 282 if(p->nlog+nslot > nelem(p->log[0])) { 283 if(!flushlog(p)) 284 return false; 285 log = p->log[p->toggle]; 286 } 287 288 q = log+p->nlog; 289 *q++ = e->count; 290 *q++ = d; 291 for(i=0; i<d; i++) 292 *q++ = e->stack[i]; 293 p->nlog = q - log; 294 e->count = 0; 295 return true; 296} 297 298// flushlog tries to flush the current log and switch to the other one. 299// flushlog is called from evict, called from add, called from the signal handler, 300// so it cannot allocate memory or block. It can try to swap logs with 301// the writing goroutine, as explained in the comment at the top of this file. 302static bool 303flushlog(Profile *p) 304{ 305 uintptr *log, *q; 306 307 if(!runtime_cas(&p->handoff, 0, p->nlog)) 308 return false; 309 runtime_notewakeup(&p->wait); 310 311 p->toggle = 1 - p->toggle; 312 log = p->log[p->toggle]; 313 q = log; 314 if(p->lost > 0) { 315 *q++ = p->lost; 316 *q++ = 1; 317 *q++ = (uintptr)LostProfileData; 318 p->lost = 0; 319 } 320 p->nlog = q - log; 321 return true; 322} 323 324// getprofile blocks until the next block of profiling data is available 325// and returns it as a []byte. It is called from the writing goroutine. 326Slice 327getprofile(Profile *p) 328{ 329 uint32 i, j, n; 330 Slice ret; 331 Bucket *b; 332 Entry *e; 333 334 ret.array = nil; 335 ret.len = 0; 336 ret.cap = 0; 337 338 if(p == nil) 339 return ret; 340 341 if(p->wholding) { 342 // Release previous log to signal handling side. 343 // Loop because we are racing against SetCPUProfileRate(0). 344 for(;;) { 345 n = p->handoff; 346 if(n == 0) { 347 runtime_printf("runtime: phase error during cpu profile handoff\n"); 348 return ret; 349 } 350 if(n & 0x80000000) { 351 p->wtoggle = 1 - p->wtoggle; 352 p->wholding = false; 353 p->flushing = true; 354 goto flush; 355 } 356 if(runtime_cas(&p->handoff, n, 0)) 357 break; 358 } 359 p->wtoggle = 1 - p->wtoggle; 360 p->wholding = false; 361 } 362 363 if(p->flushing) 364 goto flush; 365 366 if(!p->on && p->handoff == 0) 367 return ret; 368 369 // Wait for new log. 370 runtime_notetsleepg(&p->wait, -1); 371 runtime_noteclear(&p->wait); 372 373 n = p->handoff; 374 if(n == 0) { 375 runtime_printf("runtime: phase error during cpu profile wait\n"); 376 return ret; 377 } 378 if(n == 0x80000000) { 379 p->flushing = true; 380 goto flush; 381 } 382 n &= ~0x80000000; 383 384 // Return new log to caller. 385 p->wholding = true; 386 387 ret.array = (byte*)p->log[p->wtoggle]; 388 ret.len = n*sizeof(uintptr); 389 ret.cap = ret.len; 390 return ret; 391 392flush: 393 // In flush mode. 394 // Add is no longer being called. We own the log. 395 // Also, p->handoff is non-zero, so flushlog will return false. 396 // Evict the hash table into the log and return it. 397 for(i=0; i<HashSize; i++) { 398 b = &p->hash[i]; 399 for(j=0; j<Assoc; j++) { 400 e = &b->entry[j]; 401 if(e->count > 0 && !evict(p, e)) { 402 // Filled the log. Stop the loop and return what we've got. 403 goto breakflush; 404 } 405 } 406 } 407breakflush: 408 409 // Return pending log data. 410 if(p->nlog > 0) { 411 // Note that we're using toggle now, not wtoggle, 412 // because we're working on the log directly. 413 ret.array = (byte*)p->log[p->toggle]; 414 ret.len = p->nlog*sizeof(uintptr); 415 ret.cap = ret.len; 416 p->nlog = 0; 417 return ret; 418 } 419 420 // Made it through the table without finding anything to log. 421 if(!p->eod_sent) { 422 // We may not have space to append this to the partial log buf, 423 // so we always return a new slice for the end-of-data marker. 424 p->eod_sent = true; 425 ret.array = (byte*)eod; 426 ret.len = sizeof eod; 427 ret.cap = ret.len; 428 return ret; 429 } 430 431 // Finally done. Clean up and return nil. 432 p->flushing = false; 433 if(!runtime_cas(&p->handoff, p->handoff, 0)) 434 runtime_printf("runtime: profile flush racing with something\n"); 435 return ret; // set to nil at top of function 436} 437 438// CPUProfile returns the next cpu profile block as a []byte. 439// The user documentation is in debug.go. 440func CPUProfile() (ret Slice) { 441 ret = getprofile(prof); 442} 443