1// Copyright 2009 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5// Malloc profiling. 6// Patterned after tcmalloc's algorithms; shorter code. 7 8package runtime 9#include "runtime.h" 10#include "arch.h" 11#include "malloc.h" 12#include "defs.h" 13#include "go-type.h" 14#include "go-string.h" 15 16// NOTE(rsc): Everything here could use cas if contention became an issue. 17static Lock proflock; 18 19// All memory allocations are local and do not escape outside of the profiler. 20// The profiler is forbidden from referring to garbage-collected memory. 21 22enum { MProf, BProf }; // profile types 23 24// Per-call-stack profiling information. 25// Lookup by hashing call stack into a linked-list hash table. 26struct Bucket 27{ 28 Bucket *next; // next in hash list 29 Bucket *allnext; // next in list of all mbuckets/bbuckets 30 int32 typ; 31 // Generally unions can break precise GC, 32 // this one is fine because it does not contain pointers. 33 union 34 { 35 struct // typ == MProf 36 { 37 // The following complex 3-stage scheme of stats accumulation 38 // is required to obtain a consistent picture of mallocs and frees 39 // for some point in time. 40 // The problem is that mallocs come in real time, while frees 41 // come only after a GC during concurrent sweeping. So if we would 42 // naively count them, we would get a skew toward mallocs. 43 // 44 // Mallocs are accounted in recent stats. 45 // Explicit frees are accounted in recent stats. 46 // GC frees are accounted in prev stats. 47 // After GC prev stats are added to final stats and 48 // recent stats are moved into prev stats. 49 uintptr allocs; 50 uintptr frees; 51 uintptr alloc_bytes; 52 uintptr free_bytes; 53 54 uintptr prev_allocs; // since last but one till last gc 55 uintptr prev_frees; 56 uintptr prev_alloc_bytes; 57 uintptr prev_free_bytes; 58 59 uintptr recent_allocs; // since last gc till now 60 uintptr recent_frees; 61 uintptr recent_alloc_bytes; 62 uintptr recent_free_bytes; 63 64 }; 65 struct // typ == BProf 66 { 67 int64 count; 68 int64 cycles; 69 }; 70 }; 71 uintptr hash; // hash of size + stk 72 uintptr size; 73 uintptr nstk; 74 Location stk[1]; 75}; 76enum { 77 BuckHashSize = 179999, 78}; 79static Bucket **buckhash; 80static Bucket *mbuckets; // memory profile buckets 81static Bucket *bbuckets; // blocking profile buckets 82static uintptr bucketmem; 83 84// Return the bucket for stk[0:nstk], allocating new bucket if needed. 85static Bucket* 86stkbucket(int32 typ, uintptr size, Location *stk, int32 nstk, bool alloc) 87{ 88 int32 i, j; 89 uintptr h; 90 Bucket *b; 91 92 if(buckhash == nil) { 93 buckhash = runtime_SysAlloc(BuckHashSize*sizeof buckhash[0], &mstats.buckhash_sys); 94 if(buckhash == nil) 95 runtime_throw("runtime: cannot allocate memory"); 96 } 97 98 // Hash stack. 99 h = 0; 100 for(i=0; i<nstk; i++) { 101 h += stk[i].pc; 102 h += h<<10; 103 h ^= h>>6; 104 } 105 // hash in size 106 h += size; 107 h += h<<10; 108 h ^= h>>6; 109 // finalize 110 h += h<<3; 111 h ^= h>>11; 112 113 i = h%BuckHashSize; 114 for(b = buckhash[i]; b; b=b->next) { 115 if(b->typ == typ && b->hash == h && b->size == size && b->nstk == (uintptr)nstk) { 116 for(j = 0; j < nstk; j++) { 117 if(b->stk[j].pc != stk[j].pc || 118 b->stk[j].lineno != stk[j].lineno || 119 !__go_strings_equal(b->stk[j].filename, stk[j].filename)) 120 break; 121 } 122 if (j == nstk) 123 return b; 124 } 125 } 126 127 if(!alloc) 128 return nil; 129 130 b = runtime_persistentalloc(sizeof *b + nstk*sizeof stk[0], 0, &mstats.buckhash_sys); 131 bucketmem += sizeof *b + nstk*sizeof stk[0]; 132 runtime_memmove(b->stk, stk, nstk*sizeof stk[0]); 133 b->typ = typ; 134 b->hash = h; 135 b->size = size; 136 b->nstk = nstk; 137 b->next = buckhash[i]; 138 buckhash[i] = b; 139 if(typ == MProf) { 140 b->allnext = mbuckets; 141 mbuckets = b; 142 } else { 143 b->allnext = bbuckets; 144 bbuckets = b; 145 } 146 return b; 147} 148 149static void 150MProf_GC(void) 151{ 152 Bucket *b; 153 154 for(b=mbuckets; b; b=b->allnext) { 155 b->allocs += b->prev_allocs; 156 b->frees += b->prev_frees; 157 b->alloc_bytes += b->prev_alloc_bytes; 158 b->free_bytes += b->prev_free_bytes; 159 160 b->prev_allocs = b->recent_allocs; 161 b->prev_frees = b->recent_frees; 162 b->prev_alloc_bytes = b->recent_alloc_bytes; 163 b->prev_free_bytes = b->recent_free_bytes; 164 165 b->recent_allocs = 0; 166 b->recent_frees = 0; 167 b->recent_alloc_bytes = 0; 168 b->recent_free_bytes = 0; 169 } 170} 171 172// Record that a gc just happened: all the 'recent' statistics are now real. 173void 174runtime_MProf_GC(void) 175{ 176 runtime_lock(&proflock); 177 MProf_GC(); 178 runtime_unlock(&proflock); 179} 180 181// Called by malloc to record a profiled block. 182void 183runtime_MProf_Malloc(void *p, uintptr size) 184{ 185 Location stk[32]; 186 Bucket *b; 187 int32 nstk; 188 189 nstk = runtime_callers(1, stk, nelem(stk), false); 190 runtime_lock(&proflock); 191 b = stkbucket(MProf, size, stk, nstk, true); 192 b->recent_allocs++; 193 b->recent_alloc_bytes += size; 194 runtime_unlock(&proflock); 195 196 // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. 197 // This reduces potential contention and chances of deadlocks. 198 // Since the object must be alive during call to MProf_Malloc, 199 // it's fine to do this non-atomically. 200 runtime_setprofilebucket(p, b); 201} 202 203// Called when freeing a profiled block. 204void 205runtime_MProf_Free(Bucket *b, uintptr size, bool freed) 206{ 207 runtime_lock(&proflock); 208 if(freed) { 209 b->recent_frees++; 210 b->recent_free_bytes += size; 211 } else { 212 b->prev_frees++; 213 b->prev_free_bytes += size; 214 } 215 runtime_unlock(&proflock); 216} 217 218int64 runtime_blockprofilerate; // in CPU ticks 219 220void runtime_SetBlockProfileRate(intgo) __asm__ (GOSYM_PREFIX "runtime.SetBlockProfileRate"); 221 222void 223runtime_SetBlockProfileRate(intgo rate) 224{ 225 int64 r; 226 227 if(rate <= 0) 228 r = 0; // disable profiling 229 else { 230 // convert ns to cycles, use float64 to prevent overflow during multiplication 231 r = (float64)rate*runtime_tickspersecond()/(1000*1000*1000); 232 if(r == 0) 233 r = 1; 234 } 235 runtime_atomicstore64((uint64*)&runtime_blockprofilerate, r); 236} 237 238void 239runtime_blockevent(int64 cycles, int32 skip) 240{ 241 int32 nstk; 242 int64 rate; 243 Location stk[32]; 244 Bucket *b; 245 246 if(cycles <= 0) 247 return; 248 rate = runtime_atomicload64((uint64*)&runtime_blockprofilerate); 249 if(rate <= 0 || (rate > cycles && runtime_fastrand1()%rate > cycles)) 250 return; 251 252 nstk = runtime_callers(skip, stk, nelem(stk), false); 253 runtime_lock(&proflock); 254 b = stkbucket(BProf, 0, stk, nstk, true); 255 b->count++; 256 b->cycles += cycles; 257 runtime_unlock(&proflock); 258} 259 260// Go interface to profile data. (Declared in debug.go) 261 262// Must match MemProfileRecord in debug.go. 263typedef struct Record Record; 264struct Record { 265 int64 alloc_bytes, free_bytes; 266 int64 alloc_objects, free_objects; 267 uintptr stk[32]; 268}; 269 270// Write b's data to r. 271static void 272record(Record *r, Bucket *b) 273{ 274 uint32 i; 275 276 r->alloc_bytes = b->alloc_bytes; 277 r->free_bytes = b->free_bytes; 278 r->alloc_objects = b->allocs; 279 r->free_objects = b->frees; 280 for(i=0; i<b->nstk && i<nelem(r->stk); i++) 281 r->stk[i] = b->stk[i].pc; 282 for(; i<nelem(r->stk); i++) 283 r->stk[i] = 0; 284} 285 286func MemProfile(p Slice, include_inuse_zero bool) (n int, ok bool) { 287 Bucket *b; 288 Record *r; 289 bool clear; 290 291 runtime_lock(&proflock); 292 n = 0; 293 clear = true; 294 for(b=mbuckets; b; b=b->allnext) { 295 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 296 n++; 297 if(b->allocs != 0 || b->frees != 0) 298 clear = false; 299 } 300 if(clear) { 301 // Absolutely no data, suggesting that a garbage collection 302 // has not yet happened. In order to allow profiling when 303 // garbage collection is disabled from the beginning of execution, 304 // accumulate stats as if a GC just happened, and recount buckets. 305 MProf_GC(); 306 MProf_GC(); 307 n = 0; 308 for(b=mbuckets; b; b=b->allnext) 309 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 310 n++; 311 } 312 ok = false; 313 if(n <= p.__count) { 314 ok = true; 315 r = (Record*)p.__values; 316 for(b=mbuckets; b; b=b->allnext) 317 if(include_inuse_zero || b->alloc_bytes != b->free_bytes) 318 record(r++, b); 319 } 320 runtime_unlock(&proflock); 321} 322 323void 324runtime_MProf_Mark(struct Workbuf **wbufp, void (*enqueue1)(struct Workbuf**, Obj)) 325{ 326 // buckhash is not allocated via mallocgc. 327 enqueue1(wbufp, (Obj){(byte*)&mbuckets, sizeof mbuckets, 0}); 328 enqueue1(wbufp, (Obj){(byte*)&bbuckets, sizeof bbuckets, 0}); 329} 330 331void 332runtime_iterate_memprof(void (*callback)(Bucket*, uintptr, Location*, uintptr, uintptr, uintptr)) 333{ 334 Bucket *b; 335 336 runtime_lock(&proflock); 337 for(b=mbuckets; b; b=b->allnext) { 338 callback(b, b->nstk, b->stk, b->size, b->allocs, b->frees); 339 } 340 runtime_unlock(&proflock); 341} 342 343// Must match BlockProfileRecord in debug.go. 344typedef struct BRecord BRecord; 345struct BRecord { 346 int64 count; 347 int64 cycles; 348 uintptr stk[32]; 349}; 350 351func BlockProfile(p Slice) (n int, ok bool) { 352 Bucket *b; 353 BRecord *r; 354 int32 i; 355 356 runtime_lock(&proflock); 357 n = 0; 358 for(b=bbuckets; b; b=b->allnext) 359 n++; 360 ok = false; 361 if(n <= p.__count) { 362 ok = true; 363 r = (BRecord*)p.__values; 364 for(b=bbuckets; b; b=b->allnext, r++) { 365 r->count = b->count; 366 r->cycles = b->cycles; 367 for(i=0; (uintptr)i<b->nstk && (uintptr)i<nelem(r->stk); i++) 368 r->stk[i] = b->stk[i].pc; 369 for(; (uintptr)i<nelem(r->stk); i++) 370 r->stk[i] = 0; 371 } 372 } 373 runtime_unlock(&proflock); 374} 375 376// Must match StackRecord in debug.go. 377typedef struct TRecord TRecord; 378struct TRecord { 379 uintptr stk[32]; 380}; 381 382func ThreadCreateProfile(p Slice) (n int, ok bool) { 383 TRecord *r; 384 M *first, *mp; 385 int32 i; 386 387 first = runtime_atomicloadp(&runtime_allm); 388 n = 0; 389 for(mp=first; mp; mp=mp->alllink) 390 n++; 391 ok = false; 392 if(n <= p.__count) { 393 ok = true; 394 r = (TRecord*)p.__values; 395 for(mp=first; mp; mp=mp->alllink) { 396 for(i = 0; (uintptr)i < nelem(r->stk); i++) { 397 r->stk[i] = mp->createstack[i].pc; 398 } 399 r++; 400 } 401 } 402} 403 404func Stack(b Slice, all bool) (n int) { 405 byte *pc; 406 bool enablegc = false; 407 408 pc = (byte*)(uintptr)runtime_getcallerpc(&b); 409 410 if(all) { 411 runtime_semacquire(&runtime_worldsema, false); 412 runtime_m()->gcing = 1; 413 runtime_stoptheworld(); 414 enablegc = mstats.enablegc; 415 mstats.enablegc = false; 416 } 417 418 if(b.__count == 0) 419 n = 0; 420 else{ 421 G* g = runtime_g(); 422 g->writebuf = (byte*)b.__values; 423 g->writenbuf = b.__count; 424 USED(pc); 425 runtime_goroutineheader(g); 426 runtime_traceback(); 427 runtime_printcreatedby(g); 428 if(all) 429 runtime_tracebackothers(g); 430 n = b.__count - g->writenbuf; 431 g->writebuf = nil; 432 g->writenbuf = 0; 433 } 434 435 if(all) { 436 runtime_m()->gcing = 0; 437 mstats.enablegc = enablegc; 438 runtime_semrelease(&runtime_worldsema); 439 runtime_starttheworld(); 440 } 441} 442 443static void 444saveg(G *gp, TRecord *r) 445{ 446 int32 n, i; 447 Location locstk[nelem(r->stk)]; 448 449 if(gp == runtime_g()) { 450 n = runtime_callers(0, locstk, nelem(r->stk), false); 451 for(i = 0; i < n; i++) 452 r->stk[i] = locstk[i].pc; 453 } 454 else { 455 // FIXME: Not implemented. 456 n = 0; 457 } 458 if((size_t)n < nelem(r->stk)) 459 r->stk[n] = 0; 460} 461 462func GoroutineProfile(b Slice) (n int, ok bool) { 463 uintptr i; 464 TRecord *r; 465 G *gp; 466 467 ok = false; 468 n = runtime_gcount(); 469 if(n <= b.__count) { 470 runtime_semacquire(&runtime_worldsema, false); 471 runtime_m()->gcing = 1; 472 runtime_stoptheworld(); 473 474 n = runtime_gcount(); 475 if(n <= b.__count) { 476 G* g = runtime_g(); 477 ok = true; 478 r = (TRecord*)b.__values; 479 saveg(g, r++); 480 for(i = 0; i < runtime_allglen; i++) { 481 gp = runtime_allg[i]; 482 if(gp == g || gp->status == Gdead) 483 continue; 484 saveg(gp, r++); 485 } 486 } 487 488 runtime_m()->gcing = 0; 489 runtime_semrelease(&runtime_worldsema); 490 runtime_starttheworld(); 491 } 492} 493 494// Tracing of alloc/free/gc. 495 496static Lock tracelock; 497 498static const char* 499typeinfoname(int32 typeinfo) 500{ 501 if(typeinfo == TypeInfo_SingleObject) 502 return "single object"; 503 else if(typeinfo == TypeInfo_Array) 504 return "array"; 505 else if(typeinfo == TypeInfo_Chan) 506 return "channel"; 507 runtime_throw("typinfoname: unknown type info"); 508 return nil; 509} 510 511void 512runtime_tracealloc(void *p, uintptr size, uintptr typ) 513{ 514 const char *name; 515 Type *type; 516 517 runtime_lock(&tracelock); 518 runtime_m()->traceback = 2; 519 type = (Type*)(typ & ~3); 520 name = typeinfoname(typ & 3); 521 if(type == nil) 522 runtime_printf("tracealloc(%p, %p, %s)\n", p, size, name); 523 else 524 runtime_printf("tracealloc(%p, %p, %s of %S)\n", p, size, name, *type->__reflection); 525 if(runtime_m()->curg == nil || runtime_g() == runtime_m()->curg) { 526 runtime_goroutineheader(runtime_g()); 527 runtime_traceback(); 528 } else { 529 runtime_goroutineheader(runtime_m()->curg); 530 runtime_traceback(); 531 } 532 runtime_printf("\n"); 533 runtime_m()->traceback = 0; 534 runtime_unlock(&tracelock); 535} 536 537void 538runtime_tracefree(void *p, uintptr size) 539{ 540 runtime_lock(&tracelock); 541 runtime_m()->traceback = 2; 542 runtime_printf("tracefree(%p, %p)\n", p, size); 543 runtime_goroutineheader(runtime_g()); 544 runtime_traceback(); 545 runtime_printf("\n"); 546 runtime_m()->traceback = 0; 547 runtime_unlock(&tracelock); 548} 549 550void 551runtime_tracegc(void) 552{ 553 runtime_lock(&tracelock); 554 runtime_m()->traceback = 2; 555 runtime_printf("tracegc()\n"); 556 // running on m->g0 stack; show all non-g0 goroutines 557 runtime_tracebackothers(runtime_g()); 558 runtime_printf("end tracegc\n"); 559 runtime_printf("\n"); 560 runtime_m()->traceback = 0; 561 runtime_unlock(&tracelock); 562} 563