1 /*
2 Title: Profiling
3 Author: Dave Matthews, Cambridge University Computer Laboratory
4
5 Copyright (c) 2000-7
6 Cambridge University Technical Services Limited
7 Further development copyright (c) David C.J. Matthews 2011, 2015, 2020
8
9 This library is free software; you can redistribute it and/or
10 modify it under the terms of the GNU Lesser General Public
11 License version 2.1 as published by the Free Software Foundation.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
22 */
23
24 #ifdef HAVE_CONFIG_H
25 #include "config.h"
26 #elif defined(_WIN32)
27 #include "winconfig.h"
28 #else
29 #error "No configuration file"
30 #endif
31
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35
36 #ifdef HAVE_MALLOC_H
37 #include <malloc.h>
38 #endif
39
40 #ifdef HAVE_ASSERT_H
41 #include <assert.h>
42 #define ASSERT(x) assert(x)
43 #else
44 #define ASSERT(x) 0
45 #endif
46
47 #include "globals.h"
48 #include "arb.h"
49 #include "processes.h"
50 #include "polystring.h"
51 #include "profiling.h"
52 #include "save_vec.h"
53 #include "rts_module.h"
54 #include "memmgr.h"
55 #include "scanaddrs.h"
56 #include "locking.h"
57 #include "run_time.h"
58 #include "sys.h"
59 #include "rtsentry.h"
60
61 extern "C" {
62 POLYEXTERNALSYMBOL POLYUNSIGNED PolyProfiling(FirstArgument threadId, PolyWord mode);
63 }
64
65 static long mainThreadCounts[MTP_MAXENTRY];
66 static const char* const mainThreadText[MTP_MAXENTRY] =
67 {
68 "UNKNOWN",
69 "GARBAGE COLLECTION (sharing phase)",
70 "GARBAGE COLLECTION (mark phase)",
71 "GARBAGE COLLECTION (copy phase)",
72 "GARBAGE COLLECTION (update phase)",
73 "GARBAGE COLLECTION (minor collection)",
74 "Common data sharing",
75 "Exporting",
76 "Saving state",
77 "Loading saved state",
78 "Profiling",
79 "Setting signal handler",
80 "Cygwin spawn",
81 "Storing module",
82 "Loading module"
83 };
84
85 // Entries for store profiling
86 enum _extraStore {
87 EST_CODE = 0,
88 EST_STRING,
89 EST_BYTE,
90 EST_WORD,
91 EST_MUTABLE,
92 EST_MUTABLEBYTE,
93 EST_MAX_ENTRY
94 };
95
96 static POLYUNSIGNED extraStoreCounts[EST_MAX_ENTRY];
97 static const char * const extraStoreText[EST_MAX_ENTRY] =
98 {
99 "Function code",
100 "Strings",
101 "Byte data (long precision ints etc)",
102 "Unidentified word data",
103 "Unidentified mutable data",
104 "Mutable byte data (profiling counts)"
105 };
106
107 // Poly strings for "standard" counts. These are generated from the C strings
108 // above the first time profiling is activated.
109 static PolyWord psRTSString[MTP_MAXENTRY], psExtraStrings[EST_MAX_ENTRY], psGCTotal;
110
111 ProfileMode profileMode;
112 // If we are just profiling a single thread, this is the thread data.
113 static TaskData *singleThreadProfile = 0;
114
115 // The queue is processed every 400ms and an entry can be
116 // added every ms of CPU time by each thread.
117 #define PCQUEUESIZE 4000
118
119 static long queuePtr = 0;
120 static POLYCODEPTR pcQueue[PCQUEUESIZE];
121
122 // Increment, returning the original value.
incrAtomically(long & p)123 static int incrAtomically(long & p)
124 {
125 #if (defined(HAVE_SYNC_FETCH))
126 return __sync_fetch_and_add(&p, 1);
127 #elif (defined(_WIN32))
128 long newValue = InterlockedIncrement(&p);
129 return newValue - 1;
130 #else
131 return p++;
132 #endif
133 }
134
135 // Decrement and return new value.
decrAtomically(long & p)136 static int decrAtomically(long & p)
137 {
138 #if (defined(HAVE_SYNC_FETCH))
139 return __sync_sub_and_fetch(&p, 1);
140 #elif (defined(_WIN32))
141 return InterlockedDecrement(&p);
142 #else
143 return --p;
144 #endif
145 }
146
147 typedef struct _PROFENTRY
148 {
149 POLYUNSIGNED count;
150 PolyWord functionName;
151 struct _PROFENTRY *nextEntry;
152 } PROFENTRY, *PPROFENTRY;
153
154 class ProfileRequest: public MainThreadRequest
155 {
156 public:
ProfileRequest(unsigned prof,TaskData * pTask)157 ProfileRequest(unsigned prof, TaskData *pTask):
158 MainThreadRequest(MTP_PROFILING), mode(prof), pCallingThread(pTask), pTab(0), errorMessage(0) {}
159 ~ProfileRequest();
160 virtual void Perform();
161 Handle extractAsList(TaskData *taskData);
162
163 private:
164 void getResults(void);
165 void getProfileResults(PolyWord *bottom, PolyWord *top);
166 PPROFENTRY newProfileEntry(void);
167
168 private:
169 unsigned mode;
170 TaskData *pCallingThread;
171 PPROFENTRY pTab;
172
173 public:
174 const char *errorMessage;
175 };
176
~ProfileRequest()177 ProfileRequest::~ProfileRequest()
178 {
179 PPROFENTRY p = pTab;
180 while (p != 0)
181 {
182 PPROFENTRY toFree = p;
183 p = p->nextEntry;
184 free(toFree);
185 }
186 }
187
188 // Lock to serialise updates of counts. Only used during update.
189 // Not required when we print the counts since there's only one thread
190 // running then.
191 static PLock countLock;
192
193 // Get the profile object associated with a piece of code. Returns null if
194 // there isn't one, in particular if this is in the old format.
getProfileObjectForCode(PolyObject * code)195 static PolyObject *getProfileObjectForCode(PolyObject *code)
196 {
197 ASSERT(code->IsCodeObject());
198 PolyWord *consts;
199 POLYUNSIGNED constCount;
200 code->GetConstSegmentForCode(consts, constCount);
201 if (constCount < 2 || consts[1].AsUnsigned() == 0 || ! consts[1].IsDataPtr()) return 0;
202 PolyObject *profObject = consts[1].AsObjPtr();
203 if (profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1)
204 return profObject;
205 else return 0;
206 }
207
208 // Adds incr to the profile count for the function pointed at by
209 // pc or by one of its callers.
addSynchronousCount(POLYCODEPTR fpc,POLYUNSIGNED incr)210 void addSynchronousCount(POLYCODEPTR fpc, POLYUNSIGNED incr)
211 {
212 // Check that the pc value is within the heap. It could be
213 // in the assembly code.
214 PolyObject *codeObj = gMem.FindCodeObject(fpc);
215 if (codeObj)
216 {
217 PolyObject *profObject = getProfileObjectForCode(codeObj);
218 PLocker locker(&countLock);
219 if (profObject)
220 profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + incr));
221 return;
222 }
223 // Didn't find it.
224 {
225 PLocker locker(&countLock);
226 incrAtomically(mainThreadCounts[MTP_USER_CODE]);
227 }
228 }
229
230
231 // newProfileEntry - Make a new entry in the list
newProfileEntry(void)232 PPROFENTRY ProfileRequest::newProfileEntry(void)
233 {
234 PPROFENTRY newEntry = (PPROFENTRY)malloc(sizeof(PROFENTRY));
235 if (newEntry == 0) { errorMessage = "Insufficient memory"; return 0; }
236 newEntry->nextEntry = pTab;
237 pTab = newEntry;
238 return newEntry;
239 }
240
241 // We don't use ScanAddress here because we're only interested in the
242 // objects themselves not the addresses in them.
243 // We have to build the list of results in C memory rather than directly in
244 // ML memory because we can't allocate in ML memory in the root thread.
getProfileResults(PolyWord * bottom,PolyWord * top)245 void ProfileRequest::getProfileResults(PolyWord *bottom, PolyWord *top)
246 {
247 PolyWord *ptr = bottom;
248
249 while (ptr < top)
250 {
251 ptr++; // Skip the length word
252 PolyObject *obj = (PolyObject*)ptr;
253 if (obj->ContainsForwardingPtr())
254 {
255 // This used to be necessary when code objects were held in the
256 // general heap. Now that we only ever scan code and permanent
257 // areas it's probably not needed.
258 while (obj->ContainsForwardingPtr())
259 obj = obj->GetForwardingPtr();
260 ASSERT(obj->ContainsNormalLengthWord());
261 ptr += obj->Length();
262 }
263 else
264 {
265 ASSERT(obj->ContainsNormalLengthWord());
266
267 if (obj->IsCodeObject())
268 {
269 PolyWord *firstConstant = obj->ConstPtrForCode();
270 PolyWord name = firstConstant[0];
271 PolyObject *profCount = getProfileObjectForCode(obj);
272 if (profCount)
273 {
274 POLYUNSIGNED count = profCount->Get(0).AsUnsigned();
275
276 if (count != 0)
277 {
278 if (name != TAGGED(0))
279 {
280 PPROFENTRY pEnt = newProfileEntry();
281 if (pEnt == 0) return;
282 pEnt->count = count;
283 pEnt->functionName = name;
284 }
285
286 profCount->Set(0, PolyWord::FromUnsigned(0));
287 }
288 }
289 } /* code object */
290 ptr += obj->Length();
291 } /* else */
292 } /* while */
293 }
294
getResults(void)295 void ProfileRequest::getResults(void)
296 // Print profiling information and reset profile counts.
297 {
298 for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
299 {
300 MemSpace *space = *i;
301 // Permanent areas are filled with objects from the bottom.
302 getProfileResults(space->bottom, space->top); // Bottom to top
303 }
304 for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
305 {
306 CodeSpace *space = *i;
307 getProfileResults(space->bottom, space->top);
308 }
309
310 {
311 POLYUNSIGNED gc_count =
312 mainThreadCounts[MTP_GCPHASESHARING]+
313 mainThreadCounts[MTP_GCPHASEMARK]+
314 mainThreadCounts[MTP_GCPHASECOMPACT] +
315 mainThreadCounts[MTP_GCPHASEUPDATE] +
316 mainThreadCounts[MTP_GCQUICK];
317 if (gc_count)
318 {
319 PPROFENTRY pEnt = newProfileEntry();
320 if (pEnt == 0) return; // Report insufficient memory?
321 pEnt->count = gc_count;
322 pEnt->functionName = psGCTotal;
323 }
324 }
325
326 for (unsigned k = 0; k < MTP_MAXENTRY; k++)
327 {
328 if (mainThreadCounts[k])
329 {
330 PPROFENTRY pEnt = newProfileEntry();
331 if (pEnt == 0) return; // Report insufficient memory?
332 pEnt->count = mainThreadCounts[k];
333 pEnt->functionName = psRTSString[k];
334 mainThreadCounts[k] = 0;
335 }
336 }
337
338 for (unsigned l = 0; l < EST_MAX_ENTRY; l++)
339 {
340 if (extraStoreCounts[l])
341 {
342 PPROFENTRY pEnt = newProfileEntry();
343 if (pEnt == 0) return; // Report insufficient memory?
344 pEnt->count = extraStoreCounts[l];
345 pEnt->functionName = psExtraStrings[l];
346 extraStoreCounts[l] = 0;
347 }
348 }
349 }
350
351 // Extract the accumulated results as an ML list of pairs of the count and the string.
extractAsList(TaskData * taskData)352 Handle ProfileRequest::extractAsList(TaskData *taskData)
353 {
354 Handle saved = taskData->saveVec.mark();
355 Handle list = taskData->saveVec.push(ListNull);
356
357 for (PPROFENTRY p = pTab; p != 0; p = p->nextEntry)
358 {
359 Handle pair = alloc_and_save(taskData, 2);
360 Handle countValue = Make_arbitrary_precision(taskData, p->count);
361 pair->WordP()->Set(0, countValue->Word());
362 pair->WordP()->Set(1, p->functionName);
363 Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell) / sizeof(PolyWord));
364 DEREFLISTHANDLE(next)->h = pair->Word();
365 DEREFLISTHANDLE(next)->t =list->Word();
366
367 taskData->saveVec.reset(saved);
368 list = taskData->saveVec.push(next->Word());
369 }
370
371 return list;
372 }
373
374 // We have had an asynchronous interrupt and found a potential PC but
375 // we're in a signal handler.
incrementCountAsynch(POLYCODEPTR pc)376 void incrementCountAsynch(POLYCODEPTR pc)
377 {
378 int q = incrAtomically(queuePtr);
379 if (q < PCQUEUESIZE) pcQueue[q] = pc;
380 }
381
382 // Called by the main thread to process the queue of PC values
processProfileQueue()383 void processProfileQueue()
384 {
385 if (queuePtr == 0) return;
386 while (1)
387 {
388 int q = queuePtr;
389 if (q >= PCQUEUESIZE)
390 incrAtomically(mainThreadCounts[MTP_USER_CODE]);
391 else addSynchronousCount(pcQueue[q], 1);
392 if (decrAtomically(queuePtr) == 0) break;
393 }
394 }
395
396 // Handle a SIGVTALRM or the simulated equivalent in Windows. This may be called
397 // at any time so we have to be careful. In particular in Linux this may be
398 // executed by a thread while holding a mutex so we must not do anything, such
399 // calling malloc, that could require locking.
handleProfileTrap(TaskData * taskData,SIGNALCONTEXT * context)400 void handleProfileTrap(TaskData *taskData, SIGNALCONTEXT *context)
401 {
402 if (singleThreadProfile != 0 && singleThreadProfile != taskData)
403 return;
404
405 /* If we are in the garbage-collector add the count to "gc_count"
406 otherwise try to find out where we are. */
407 if (mainThreadPhase == MTP_USER_CODE)
408 {
409 if (taskData == 0 || ! taskData->AddTimeProfileCount(context))
410 incrAtomically(mainThreadCounts[MTP_USER_CODE]);
411 // On Mac OS X all virtual timer interrupts seem to be directed to the root thread
412 // so all the counts will be "unknown".
413 }
414 else incrAtomically(mainThreadCounts[mainThreadPhase]);
415 }
416
417 // Called from the GC when allocation profiling is on.
AddObjectProfile(PolyObject * obj)418 void AddObjectProfile(PolyObject *obj)
419 {
420 ASSERT(obj->ContainsNormalLengthWord());
421 POLYUNSIGNED length = obj->Length();
422
423 if (obj->IsWordObject() && OBJ_HAS_PROFILE(obj->LengthWord()))
424 {
425 // It has a profile pointer. The last word should point to the
426 // closure or code of the allocating function. Add the size of this to the count.
427 ASSERT(length != 0);
428 PolyWord profWord = obj->Get(length-1);
429 ASSERT(profWord.IsDataPtr());
430 PolyObject *profObject = profWord.AsObjPtr();
431 ASSERT(profObject->IsMutable() && profObject->IsByteObject() && profObject->Length() == 1);
432 profObject->Set(0, PolyWord::FromUnsigned(profObject->Get(0).AsUnsigned() + length + 1));
433 }
434 // If it doesn't have a profile pointer add it to the appropriate count.
435 else if (obj->IsMutable())
436 {
437 if (obj->IsByteObject())
438 extraStoreCounts[EST_MUTABLEBYTE] += length+1;
439 else extraStoreCounts[EST_MUTABLE] += length+1;
440 }
441 else if (obj->IsCodeObject())
442 extraStoreCounts[EST_CODE] += length+1;
443 else if (obj->IsClosureObject())
444 {
445 ASSERT(0);
446 }
447 else if (obj->IsByteObject())
448 {
449 // Try to separate strings from other byte data. This is only
450 // approximate.
451 if (OBJ_IS_NEGATIVE(obj->LengthWord()))
452 extraStoreCounts[EST_BYTE] += length+1;
453 else
454 {
455 PolyStringObject *possString = (PolyStringObject*)obj;
456 POLYUNSIGNED bytes = length * sizeof(PolyWord);
457 // If the length of the string as given in the first word is sufficient
458 // to fit in the exact number of words then it's probably a string.
459 if (length >= 2 &&
460 possString->length <= bytes - sizeof(POLYUNSIGNED) &&
461 possString->length > bytes - 2 * sizeof(POLYUNSIGNED))
462 extraStoreCounts[EST_STRING] += length+1;
463 else
464 {
465 extraStoreCounts[EST_BYTE] += length+1;
466 }
467 }
468 }
469 else extraStoreCounts[EST_WORD] += length+1;
470 }
471
472 // Called from ML to control profiling.
profilerc(TaskData * taskData,Handle mode_handle)473 static Handle profilerc(TaskData *taskData, Handle mode_handle)
474 /* Profiler - generates statistical profiles of the code.
475 The parameter is an integer which determines the value to be profiled.
476 When profiler is called it always resets the profiling and prints out any
477 values which have been accumulated.
478 If the parameter is 0 this is all it does,
479 if the parameter is 1 then it produces time profiling,
480 if the parameter is 2 it produces store profiling.
481 3 - arbitrary precision emulation traps. */
482 {
483 unsigned mode = get_C_unsigned(taskData, mode_handle->Word());
484 {
485 // Create any strings we need. We only need to do this once but
486 // it must be done by a non-root thread since it needs a taskData object.
487 // Don't bother locking. At worst we'll create some garbage.
488 for (unsigned k = 0; k < MTP_MAXENTRY; k++)
489 {
490 if (psRTSString[k] == TAGGED(0))
491 psRTSString[k] = C_string_to_Poly(taskData, mainThreadText[k]);
492 }
493 for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
494 {
495 if (psExtraStrings[k] == TAGGED(0))
496 psExtraStrings[k] = C_string_to_Poly(taskData, extraStoreText[k]);
497 }
498 if (psGCTotal == TAGGED(0))
499 psGCTotal = C_string_to_Poly(taskData, "GARBAGE COLLECTION (total)");
500 }
501 // All these actions are performed by the root thread. Only profile
502 // printing needs to be performed with all the threads stopped but it's
503 // simpler to serialise all requests.
504 ProfileRequest request(mode, taskData);
505 processes->MakeRootRequest(taskData, &request);
506 if (request.errorMessage != 0) raise_exception_string(taskData, EXC_Fail, request.errorMessage);
507 return request.extractAsList(taskData);
508 }
509
PolyProfiling(FirstArgument threadId,PolyWord mode)510 POLYUNSIGNED PolyProfiling(FirstArgument threadId, PolyWord mode)
511 {
512 TaskData *taskData = TaskData::FindTaskForId(threadId);
513 ASSERT(taskData != 0);
514 taskData->PreRTSCall();
515 Handle reset = taskData->saveVec.mark();
516 Handle pushedMode = taskData->saveVec.push(mode);
517 Handle result = 0;
518
519 try {
520 result = profilerc(taskData, pushedMode);
521 } catch (...) { } // If an ML exception is raised
522
523 taskData->saveVec.reset(reset);
524 taskData->PostRTSCall();
525 if (result == 0) return TAGGED(0).AsUnsigned();
526 else return result->Word().AsUnsigned();
527 }
528
529 // This is called from the root thread when all the ML threads have been paused.
Perform()530 void ProfileRequest::Perform()
531 {
532 if (mode != kProfileOff && profileMode != kProfileOff)
533 {
534 // Profiling must be stopped first.
535 errorMessage = "Profiling is currently active";
536 return;
537 }
538
539 singleThreadProfile = 0; // Unless kProfileTimeThread is given this should be 0
540
541 switch (mode)
542 {
543 case kProfileOff:
544 // Turn off old profiling mechanism and print out accumulated results
545 profileMode = kProfileOff;
546 processes->StopProfiling();
547 getResults();
548 // Remove all the bitmaps to free up memory
549 gMem.RemoveProfilingBitmaps();
550 break;
551
552 case kProfileTimeThread:
553 singleThreadProfile = pCallingThread;
554 // And drop through to kProfileTime
555
556 case kProfileTime:
557 profileMode = kProfileTime;
558 processes->StartProfiling();
559 break;
560
561 case kProfileStoreAllocation:
562 profileMode = kProfileStoreAllocation;
563 break;
564
565 case kProfileEmulation:
566 profileMode = kProfileEmulation;
567 break;
568
569 case kProfileLiveData:
570 profileMode = kProfileLiveData;
571 break;
572
573 case kProfileLiveMutables:
574 profileMode = kProfileLiveMutables;
575 break;
576
577 case kProfileMutexContention:
578 profileMode = kProfileMutexContention;
579 break;
580
581 default: /* do nothing */
582 break;
583 }
584
585 }
586
587 struct _entrypts profilingEPT[] =
588 {
589 // Profiling
590 { "PolyProfiling", (polyRTSFunction)&PolyProfiling},
591
592 { NULL, NULL} // End of list.
593 };
594
595
596 class Profiling: public RtsModule
597 {
598 public:
599 virtual void Init(void);
600 virtual void GarbageCollect(ScanAddress *process);
601 };
602
603 // Declare this. It will be automatically added to the table.
604 static Profiling profileModule;
605
Init(void)606 void Profiling::Init(void)
607 {
608 // Reset profiling counts.
609 profileMode = kProfileOff;
610 for (unsigned k = 0; k < MTP_MAXENTRY; k++) mainThreadCounts[k] = 0;
611 }
612
GarbageCollect(ScanAddress * process)613 void Profiling::GarbageCollect(ScanAddress *process)
614 {
615 // Process any strings in the table.
616 for (unsigned k = 0; k < MTP_MAXENTRY; k++)
617 process->ScanRuntimeWord(&psRTSString[k]);
618 for (unsigned k = 0; k < EST_MAX_ENTRY; k++)
619 process->ScanRuntimeWord(&psExtraStrings[k]);
620 process->ScanRuntimeWord(&psGCTotal);
621 }
622