1 /*
2  * Hatari - profiledsp.c
3  *
4  * Copyright (C) 2010-2015 by Eero Tamminen
5  *
6  * This file is distributed under the GNU General Public License, version 2
7  * or at your option any later version. Read the file gpl.txt for details.
8  *
9  * profiledsp.c - functions for profiling DSP and showing the results.
10  */
11 const char Profiledsp_fileid[] = "Hatari profiledsp.c : " __DATE__ " " __TIME__;
12 
13 #include <stdio.h>
14 #include <inttypes.h>
15 #include <limits.h>
16 #include <assert.h>
17 #include "main.h"
18 #include "configuration.h"
19 #include "clocks_timings.h"
20 #include "dsp.h"
21 #include "symbols.h"
22 #include "profile.h"
23 #include "profile_priv.h"
24 #include "debug_priv.h"
25 /* for VBL info */
26 #include "screen.h"
27 #include "video.h"
28 
29 static callinfo_t dsp_callinfo;
30 
31 #define DSP_PROFILE_ARR_SIZE 0x10000
32 #define MAX_DSP_PROFILE_VALUE 0xFFFFFFFFFFFFFFFFLL
33 
34 typedef struct {
35 	Uint64 count;		/* how many times this address is used */
36 	Uint64 cycles;		/* how many DSP cycles was taken at this address */
37 	Uint16 min_cycle;
38 	Uint16 max_cycle;
39 } dsp_profile_item_t;
40 
41 static struct {
42 	dsp_profile_item_t *data; /* profile data */
43 	profile_area_t ram;   /* statistics for whole memory */
44 	Uint16 *sort_arr;     /* data indexes used for sorting */
45 	Uint16 prev_pc;       /* previous PC for which the cycles are for */
46 	Uint16 loop_start;    /* address of last loop start */
47 	Uint16 loop_end;      /* address of last loop end */
48 	Uint32 loop_count;    /* how many times it was looped */
49 	Uint32 disasm_addr;   /* 'dspaddresses' command start address */
50 	bool processed;	      /* true when data is already processed */
51 	bool enabled;         /* true when profiling enabled */
52 } dsp_profile;
53 
54 
55 /* ------------------ DSP profile results ----------------- */
56 
57 /**
58  * Get DSP cycles, count and count percentage for given address.
59  * Return true if data was available and non-zero, false otherwise.
60  */
Profile_DspAddressData(Uint16 addr,float * percentage,Uint64 * count,Uint64 * cycles,Uint16 * cycle_diff)61 bool Profile_DspAddressData(Uint16 addr, float *percentage, Uint64 *count, Uint64 *cycles, Uint16 *cycle_diff)
62 {
63 	dsp_profile_item_t *item;
64 	if (!dsp_profile.data) {
65 		return false;
66 	}
67 	item = dsp_profile.data + addr;
68 
69 	*cycles = item->cycles;
70 	*count = item->count;
71 	if (item->max_cycle) {
72 		*cycle_diff = item->max_cycle - item->min_cycle;
73 	} else {
74 		*cycle_diff = 0;
75 	}
76 	if (dsp_profile.ram.counters.count) {
77 		*percentage = 100.0*(*count)/dsp_profile.ram.counters.count;
78 	} else {
79 		*percentage = 0.0;
80 	}
81 	return (*count > 0);
82 }
83 
84 /**
85  * show DSP specific profile statistics.
86  */
Profile_DspShowStats(void)87 void Profile_DspShowStats(void)
88 {
89 	profile_area_t *area = &dsp_profile.ram;
90 	fprintf(stderr, "DSP profile statistics (0x0-0xFFFF):\n");
91 	if (!area->active) {
92 		fprintf(stderr, "- no activity\n");
93 		return;
94 	}
95 	fprintf(stderr, "- active address range:\n  0x%04x-0x%04x\n",
96 		area->lowest, area->highest);
97 	fprintf(stderr, "- active instruction addresses:\n  %d\n",
98 		area->active);
99 	fprintf(stderr, "- executed instructions:\n  %"PRIu64"\n",
100 		area->counters.count);
101 	/* indicates either instruction(s) that address different memory areas
102 	 * (they can have different access costs), or more significantly,
103 	 * DSP code that has changed during profiling.
104 	 */
105 	fprintf(stderr, "- sum of per instruction cycle changes\n"
106 		"  (can indicate code change during profiling):\n  %"PRIu64"\n",
107 		area->counters.cycles_diffs);
108 
109 	fprintf(stderr, "- used cycles:\n  %"PRIu64"\n",
110 		area->counters.cycles);
111 	if (area->overflow) {
112 		fprintf(stderr, "  *** COUNTERS OVERFLOW! ***\n");
113 	}
114 	fprintf(stderr, "\n= %.5fs\n", (double)(area->counters.cycles) / MachineClocks.DSP_Freq);
115 }
116 
117 /**
118  * Show DSP instructions which execution was profiled, in the address order,
119  * starting from the given address.  Return next disassembly address.
120  */
Profile_DspShowAddresses(Uint32 addr,Uint32 upper,FILE * out,paging_t use_paging)121 Uint16 Profile_DspShowAddresses(Uint32 addr, Uint32 upper, FILE *out, paging_t use_paging)
122 {
123 	int show, shown, addrs, active;
124 	dsp_profile_item_t *data;
125 	Uint16 nextpc;
126 	Uint32 end;
127 	const char *symbol;
128 
129 	data = dsp_profile.data;
130 	if (!data) {
131 		fprintf(stderr, "ERROR: no DSP profiling data available!\n");
132 		return 0;
133 	}
134 
135 	end = DSP_PROFILE_ARR_SIZE;
136 	active = dsp_profile.ram.active;
137 	if (upper) {
138 		if (upper < end) {
139 			end = upper;
140 		}
141 	}
142 	show = INT_MAX;
143 	if (use_paging == PAGING_ENABLED) {
144 		show = DebugUI_GetPageLines(ConfigureParams.Debugger.nDisasmLines, 0);
145 		if (!show) {
146 			show = INT_MAX;
147 		}
148 	}
149 
150 	fputs("# disassembly with profile data: <instructions percentage>% (<sum of instructions>, <sum of cycles>, <max cycle difference>)\n", out);
151 	shown = 2; /* first and last printf */
152 
153 	addrs = nextpc = 0;
154 	for (; shown < show && addrs < active && addr < end; addr++) {
155 		if (!data[addr].count) {
156 			continue;
157 		}
158 		if (addr != nextpc && nextpc) {
159 			fputs("[...]\n", out);
160 			shown++;
161 		}
162 		symbol = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
163 		if (symbol) {
164 			fprintf(out, "%s:\n", symbol);
165 			shown++;
166 		}
167 		nextpc = DSP_DisasmAddress(out, addr, addr);
168 		addrs++;
169 		shown++;
170 	}
171 	if (addr < end) {
172 		printf("Disassembled %d (of active %d) DSP addresses.\n", addrs, active);
173 	} else {
174 		printf("Disassembled last %d (of active %d) DSP addresses, wrapping...\n", addrs, active);
175 		nextpc = 0;
176 	}
177 	return nextpc;
178 }
179 
180 /**
181  * compare function for qsort() to sort DSP profile data by descdending
182  * address cycles counts.
183  */
cmp_dsp_cycles(const void * p1,const void * p2)184 static int cmp_dsp_cycles(const void *p1, const void *p2)
185 {
186 	Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].cycles;
187 	Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].cycles;
188 	if (count1 > count2) {
189 		return -1;
190 	}
191 	if (count1 < count2) {
192 		return 1;
193 	}
194 	return 0;
195 }
196 
197 /**
198  * Sort DSP profile data addresses by cycle counts and show the results.
199  */
Profile_DspShowCycles(int show)200 void Profile_DspShowCycles(int show)
201 {
202 	int active;
203 	Uint16 *sort_arr, *end, addr;
204 	dsp_profile_item_t *data = dsp_profile.data;
205 	float percentage;
206 	Uint64 count;
207 
208 	if (!data) {
209 		fprintf(stderr, "ERROR: no DSP profiling data available!\n");
210 		return;
211 	}
212 
213 	active = dsp_profile.ram.active;
214 	sort_arr = dsp_profile.sort_arr;
215 	qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_cycles);
216 
217 	printf("addr:\tcycles:\n");
218 	show = (show < active ? show : active);
219 	for (end = sort_arr + show; sort_arr < end; sort_arr++) {
220 		addr = *sort_arr;
221 		count = data[addr].cycles;
222 		percentage = 100.0*count/dsp_profile.ram.counters.cycles;
223 		printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n", addr, percentage, count,
224 		       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
225 	}
226 	printf("%d DSP addresses listed.\n", show);
227 }
228 
229 
230 /**
231  * compare function for qsort() to sort DSP profile data by descdending
232  * address access counts.
233  */
cmp_dsp_count(const void * p1,const void * p2)234 static int cmp_dsp_count(const void *p1, const void *p2)
235 {
236 	Uint64 count1 = dsp_profile.data[*(const Uint16*)p1].count;
237 	Uint64 count2 = dsp_profile.data[*(const Uint16*)p2].count;
238 	if (count1 > count2) {
239 		return -1;
240 	}
241 	if (count1 < count2) {
242 		return 1;
243 	}
244 	return 0;
245 }
246 
247 /**
248  * Sort DSP profile data addresses by call counts and show the results.
249  * If symbols are requested and symbols are loaded, show (only) addresses
250  * matching a symbol.
251  */
Profile_DspShowCounts(int show,bool only_symbols)252 void Profile_DspShowCounts(int show, bool only_symbols)
253 {
254 	dsp_profile_item_t *data = dsp_profile.data;
255 	int symbols, matched, active;
256 	Uint16 *sort_arr, *end, addr;
257 	const char *name;
258 	float percentage;
259 	Uint64 count;
260 
261 	if (!data) {
262 		fprintf(stderr, "ERROR: no DSP profiling data available!\n");
263 		return;
264 	}
265 	active = dsp_profile.ram.active;
266 	show = (show < active ? show : active);
267 
268 	sort_arr = dsp_profile.sort_arr;
269 	qsort(sort_arr, active, sizeof(*sort_arr), cmp_dsp_count);
270 
271 	if (!only_symbols) {
272 		printf("addr:\tcount:\n");
273 		for (end = sort_arr + show; sort_arr < end; sort_arr++) {
274 			addr = *sort_arr;
275 			count = data[addr].count;
276 			percentage = 100.0*count/dsp_profile.ram.counters.count;
277 			printf("0x%04x\t%5.2f%%\t%"PRIu64"%s\n",
278 			       addr, percentage, count,
279 			       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
280 		}
281 		printf("%d DSP addresses listed.\n", show);
282 		return;
283 	}
284 
285 	symbols = Symbols_DspCodeCount();
286 	if (!symbols) {
287 		fprintf(stderr, "ERROR: no DSP symbols loaded!\n");
288 		return;
289 	}
290 	matched = 0;
291 
292 	printf("addr:\tcount:\t\tsymbol:\n");
293 	for (end = sort_arr + active; sort_arr < end; sort_arr++) {
294 
295 		addr = *sort_arr;
296 		name = Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
297 		if (!name) {
298 			continue;
299 		}
300 		count = data[addr].count;
301 		percentage = 100.0*count/dsp_profile.ram.counters.count;
302 		printf("0x%04x\t%.2f%%\t%"PRIu64"\t%s%s\n",
303 		       addr, percentage, count, name,
304 		       count == MAX_DSP_PROFILE_VALUE ? " (OVERFLOW)" : "");
305 
306 		matched++;
307 		if (matched >= show || matched >= symbols) {
308 			break;
309 		}
310 	}
311 	printf("%d DSP symbols listed.\n", matched);
312 }
313 
314 
addr2name(Uint32 addr,Uint64 * total)315 static const char * addr2name(Uint32 addr, Uint64 *total)
316 {
317 	*total = dsp_profile.data[addr].count;
318 	return Symbols_GetByDspAddress(addr, SYMTYPE_TEXT);
319 }
320 
321 /**
322  * Output DSP callers info to given file.
323  */
Profile_DspShowCallers(FILE * fp)324 void Profile_DspShowCallers(FILE *fp)
325 {
326 	Profile_ShowCallers(fp, dsp_callinfo.sites, dsp_callinfo.site, addr2name);
327 }
328 
329 /**
330  * Save DSP profile information to given file.
331  */
Profile_DspSave(FILE * out)332 void Profile_DspSave(FILE *out)
333 {
334 	/* Comma separated descriptions for the profile disassembly data fields.
335 	 * Instructions and cycles need to be first two fields!
336 	 */
337 	fputs("Field names:\tExecuted instructions, Used cycles, Largest cycle differences (= code changes during profiling)\n", out);
338 	/* (Python) pegexp that matches address and all describled fields from disassembly:
339 	 * <space>:<address> <opcodes> (<instr cycles>) <instr> <count>% (<count>, <cycles>)
340 	 * p:0202  0aa980 000200  (07 cyc)  jclr #0,x:$ffe9,p:$0200  0.00% (6, 42)
341 	 */
342 	fputs("Field regexp:\t^p:([0-9a-f]+) .*% \\((.*)\\)$\n", out);
343 	Profile_DspShowAddresses(0, DSP_PROFILE_ARR_SIZE, out, PAGING_DISABLED);
344 	Profile_DspShowCallers(out);
345 }
346 
347 /* ------------------ DSP profile control ----------------- */
348 
349 /**
350  * Initialize DSP profiling when necessary.  Return true if profiling.
351  */
Profile_DspStart(void)352 bool Profile_DspStart(void)
353 {
354 	dsp_profile_item_t *item;
355 	int i;
356 
357 	Profile_FreeCallinfo(&(dsp_callinfo));
358 	if (dsp_profile.sort_arr) {
359 		/* remove previous results */
360 		free(dsp_profile.sort_arr);
361 		free(dsp_profile.data);
362 		dsp_profile.sort_arr = NULL;
363 		dsp_profile.data = NULL;
364 		printf("Freed previous DSP profile buffers.\n");
365 	}
366 	if (!dsp_profile.enabled) {
367 		return false;
368 	}
369 	/* zero everything */
370 	memset(&dsp_profile, 0, sizeof(dsp_profile));
371 
372 	dsp_profile.data = calloc(DSP_PROFILE_ARR_SIZE, sizeof(*dsp_profile.data));
373 	if (!dsp_profile.data) {
374 		perror("ERROR, new DSP profile buffer alloc failed");
375 		return false;
376 	}
377 	printf("Allocated DSP profile buffer (%d KB).\n",
378 	       (int)sizeof(*dsp_profile.data)*DSP_PROFILE_ARR_SIZE/1024);
379 
380 	Profile_AllocCallinfo(&(dsp_callinfo), Symbols_DspCodeCount(), "DSP");
381 
382 	item = dsp_profile.data;
383 	for (i = 0; i < DSP_PROFILE_ARR_SIZE; i++, item++) {
384 		item->min_cycle = 0xFFFF;
385 	}
386 	dsp_profile.prev_pc = DSP_GetPC();
387 
388 	dsp_profile.loop_start = 0xFFFF;
389 	dsp_profile.loop_end = 0xFFFF;
390 	dsp_profile.loop_count = 0;
391 	Profile_LoopReset();
392 
393 	dsp_profile.disasm_addr = 0;
394 	dsp_profile.processed = false;
395 	dsp_profile.enabled = true;
396 	return dsp_profile.enabled;
397 }
398 
399 /* return true if pc is next instruction for previous pc */
is_prev_instr(Uint16 prev_pc,Uint16 pc)400 static bool is_prev_instr(Uint16 prev_pc, Uint16 pc)
401 {
402 	/* just moved to next instruction (1-2 words)? */
403 	if (prev_pc < pc && (pc - prev_pc) <= 4) {
404 		return true;
405 	}
406 	return false;
407 }
408 
409 /* return branch type based on caller instruction type */
dsp_opcode_type(Uint16 prev_pc,Uint16 pc)410 static calltype_t dsp_opcode_type(Uint16 prev_pc, Uint16 pc)
411 {
412 	const char *dummy;
413 	Uint32 opcode;
414 
415 	/* 24-bit instruction opcode */
416 	opcode = DSP_ReadMemory(prev_pc, 'P', &dummy) & 0xFFFFFF;
417 
418 	/* subroutine returns */
419 	if (opcode == 0xC) {	/* (just) RTS */
420 		return CALL_SUBRETURN;
421 	}
422 	/* unconditional subroutine calls */
423 	if ((opcode & 0xFFF000) == 0xD0000 ||	/* JSR   00001101 0000aaaa aaaaaaaa */
424 	    (opcode & 0xFFC0FF) == 0xBC080) {	/* JSR   00001011 11MMMRRR 10000000 */
425 		return CALL_SUBROUTINE;
426 	}
427 	/* conditional subroutine calls */
428 	if ((opcode & 0xFF0000) == 0xF0000 ||	/* JSCC  00001111 CCCCaaaa aaaaaaaa */
429 	    (opcode & 0xFFC0F0) == 0xBC0A0 ||	/* JSCC  00001011 11MMMRRR 1010CCCC */
430 	    (opcode & 0xFFC0A0) == 0xB4080 ||	/* JSCLR 00001011 01MMMRRR 1S0bbbbb */
431 	    (opcode & 0xFFC0A0) == 0xB0080 ||	/* JSCLR 00001011 00aaaaaa 1S0bbbbb */
432 	    (opcode & 0xFFC0A0) == 0xB8080 ||	/* JSCLR 00001011 10pppppp 1S0bbbbb */
433 	    (opcode & 0xFFC0E0) == 0xBC000 ||	/* JSCLR 00001011 11DDDDDD 000bbbbb */
434 	    (opcode & 0xFFC0A0) == 0xB40A0 ||	/* JSSET 00001011 01MMMRRR 1S1bbbbb */
435 	    (opcode & 0xFFC0A0) == 0xB00A0 ||	/* JSSET 00001011 00aaaaaa 1S1bbbbb */
436 	    (opcode & 0xFFC0A0) == 0xB80A0 ||	/* JSSET 00001011 10pppppp 1S1bbbbb */
437 	    (opcode & 0xFFC0E0) == 0xBC020) {	/* JSSET 00001011 11DDDDDD 001bbbbb */
438 		/* hopefully fairly safe heuristic:
439 		 * if previously executed instruction
440 		 * was one before current one, no
441 		 * subroutine call was made to next
442 		 * instruction, the condition just
443 		 * wasn't met.
444 		 */
445 		if (is_prev_instr(prev_pc, pc)) {
446 			return CALL_NEXT;
447 		}
448 		return CALL_SUBROUTINE;
449 	}
450 	/* exception handler returns */
451 	if (opcode == 0x4) {	/* (just) RTI */
452 		return CALL_EXCRETURN;
453 	}
454 
455 	/* Besides CALL_UNKNOWN, rest isn't used by subroutine call
456 	 * cost collection.  However, it's useful info when debugging
457 	 * code or reading full callgraphs (because optimized code uses
458 	 * also jumps/branches for subroutine calls).
459 	 */
460 
461 	/* TODO: exception invocation.
462 	 * Could be detected by PC going through low interrupt vector adresses,
463 	 * but fast-calls using JSR/RTS would need separate handling.
464 	 */
465 	if (0) {	/* TODO */
466 		return CALL_EXCEPTION;
467 	}
468 	/* branches */
469 	if ((opcode & 0xFFF000) == 0xC0000 ||	/* JMP  00001100 0000aaaa aaaaaaaa */
470 	    (opcode & 0xFFC0FF) == 0xAC080 ||	/* JMP  00001010 11MMMRRR 10000000 */
471 	    (opcode & 0xFF0000) == 0xE0000 ||	/* JCC  00001110 CCCCaaaa aaaaaaaa */
472 	    (opcode & 0xFFC0F0) == 0xAC0A0 ||	/* JCC  00001010 11MMMRRR 1010CCCC */
473 	    (opcode & 0xFFC0A0) == 0xA8080 ||	/* JCLR 00001010 10pppppp 1S0bbbbb */
474 	    (opcode & 0xFFC0A0) == 0xA4080 ||	/* JCLR 00001010 01MMMRRR 1S0bbbbb */
475 	    (opcode & 0xFFC0A0) == 0xA0080 ||	/* JCLR 00001010 00aaaaaa 1S0bbbbb */
476 	    (opcode & 0xFFC0E0) == 0xAC000 ||	/* JCLR 00001010 11dddddd 000bbbbb */
477 	    (opcode & 0xFFC0A0) == 0xA80A0 ||	/* JSET 00001010 10pppppp 1S1bbbbb */
478 	    (opcode & 0xFFC0A0) == 0xA40A0 ||	/* JSET 00001010 01MMMRRR 1S1bbbbb */
479 	    (opcode & 0xFFC0A0) == 0xA00A0 ||	/* JSET 00001010 00aaaaaa 1S1bbbbb */
480 	    (opcode & 0xFFC0E0) == 0xAC020 ||	/* JSET 00001010 11dddddd 001bbbbb */
481 	    (opcode & 0xFF00F0) == 0x600A0 ||	/* REP  00000110 iiiiiiii 1010hhhh */
482 	    (opcode & 0xFFC0FF) == 0x6C020 ||	/* REP  00000110 11dddddd 00100000 */
483 	    (opcode & 0xFFC0BF) == 0x64020 ||	/* REP  00000110 01MMMRRR 0s100000 */
484 	    (opcode & 0xFFC0BF) == 0x60020 ||	/* REP  00000110 00aaaaaa 0s100000 */
485 	    (opcode & 0xFF00F0) == 0x60080 ||	/* DO/ENDO 00000110 iiiiiiii 1000hhhh */
486 	    (opcode & 0xFFC0FF) == 0x6C000 ||	/* DO/ENDO 00000110 11DDDDDD 00000000 */
487 	    (opcode & 0xFFC0BF) == 0x64000 ||	/* DO/ENDO 00000110 01MMMRRR 0S000000 */
488 	    (opcode & 0xFFC0BF) == 0x60000) {	/* DO/ENDO 00000110 00aaaaaa 0S000000 */
489 		return CALL_BRANCH;
490 	}
491 	if (is_prev_instr(prev_pc, pc)) {
492 		return CALL_NEXT;
493 	}
494 	return CALL_UNKNOWN;
495 }
496 
497 /**
498  * If call tracking is enabled (there are symbols), collect
499  * information about subroutine and other calls, and their costs.
500  *
501  * Like with profile data, caller info checks need to be for previous
502  * instruction, that's why "pc" argument for this function actually
503  * needs to be previous PC.
504  */
collect_calls(Uint16 pc,counters_t * counters)505 static void collect_calls(Uint16 pc, counters_t *counters)
506 {
507 	calltype_t flag;
508 	Uint16 prev_pc;
509 	Uint32 caller_pc;
510 	int idx;
511 
512 	prev_pc = dsp_callinfo.prev_pc;
513 	dsp_callinfo.prev_pc = pc;
514 	caller_pc = PC_UNDEFINED;
515 
516 	/* address is return address for last subroutine call? */
517 	if (unlikely(pc == dsp_callinfo.return_pc) && likely(dsp_callinfo.depth)) {
518 
519 		flag = dsp_opcode_type(prev_pc, pc);
520 		/* return address is entered either by subroutine return,
521 		 * or by returning from exception that interrupted
522 		 * the instruction at return address.
523 		 */
524 		if (likely(flag == CALL_SUBRETURN || flag == CALL_EXCRETURN)) {
525 			caller_pc = Profile_CallEnd(&dsp_callinfo, counters);
526 		}
527 	}
528 
529 	/* address is one which we're tracking? */
530 	idx = Symbols_GetDspCodeIndex(pc);
531 	if (unlikely(idx >= 0)) {
532 
533 		flag = dsp_opcode_type(prev_pc, pc);
534 		if (flag == CALL_SUBROUTINE) {
535 			dsp_callinfo.return_pc = DSP_GetNextPC(prev_pc);  /* slow! */
536 		} else if (caller_pc != PC_UNDEFINED) {
537 			/* returned from function, change return
538 			 * instruction address to address of
539 			 * what did the returned call.
540 			 */
541 			prev_pc = caller_pc;
542 			assert(is_prev_instr(prev_pc, pc));
543 			flag = CALL_NEXT;
544 		}
545 		Profile_CallStart(idx, &dsp_callinfo, prev_pc, flag, pc, counters);
546 
547 	}
548 }
549 
550 /**
551  * log last loop info, if there's suitable data for one
552  */
log_last_loop(void)553 static void log_last_loop(void)
554 {
555 	unsigned len = dsp_profile.loop_end - dsp_profile.loop_start;
556 	if (dsp_profile.loop_count > 1 && (len < profile_loop.dsp_limit || !profile_loop.dsp_limit)) {
557 		fprintf(profile_loop.fp, "DSP %d 0x%04x %d %d\n", nVBLs,
558 			dsp_profile.loop_start, len, dsp_profile.loop_count);
559 		fflush(profile_loop.fp);
560 	}
561 }
562 
563 /**
564  * Update DSP cycle and count statistics for PC address.
565  *
566  * This is called after instruction is executed and PC points
567  * to next instruction i.e. info is for previous PC address.
568  */
Profile_DspUpdate(void)569 void Profile_DspUpdate(void)
570 {
571 	dsp_profile_item_t *prev;
572 	Uint16 pc, prev_pc, cycles;
573 	counters_t *counters;
574 
575 	prev_pc = dsp_profile.prev_pc;
576 	dsp_profile.prev_pc = pc = DSP_GetPC();
577 
578 	if (unlikely(profile_loop.fp)) {
579 		if (pc < prev_pc) {
580 			if (pc == dsp_profile.loop_start && prev_pc == dsp_profile.loop_end) {
581 				dsp_profile.loop_count++;
582 			} else {
583 				dsp_profile.loop_start = pc;
584 				dsp_profile.loop_end = prev_pc;
585 				dsp_profile.loop_count = 1;
586 			}
587 		} else {
588 			if (pc > dsp_profile.loop_end) {
589 				log_last_loop();
590 				dsp_profile.loop_end = 0xFFFF;
591 				dsp_profile.loop_count = 0;
592 			}
593 		}
594 	}
595 
596 	prev = dsp_profile.data + prev_pc;
597 
598 	if (likely(prev->count < MAX_DSP_PROFILE_VALUE)) {
599 		prev->count++;
600 	}
601 
602 	cycles = DSP_GetInstrCycles();
603 	if (likely(prev->cycles < MAX_DSP_PROFILE_VALUE - cycles)) {
604 		prev->cycles += cycles;
605 	} else {
606 		prev->cycles = MAX_DSP_PROFILE_VALUE;
607 	}
608 
609 	if (unlikely(cycles < prev->min_cycle)) {
610 		prev->min_cycle = cycles;
611 	}
612 	if (unlikely(cycles > prev->max_cycle)) {
613 		prev->max_cycle = cycles;
614 	}
615 
616 	counters = &(dsp_profile.ram.counters);
617 	if (dsp_callinfo.sites) {
618 		collect_calls(prev_pc, counters);
619 	}
620 	/* counters are increased after caller info is processed,
621 	 * otherwise cost for the instruction calling the callee
622 	 * doesn't get accounted to caller (but callee).
623 	 */
624 	counters->cycles += cycles;
625 	counters->count++;
626 }
627 
628 /**
629  * Helper for collecting DSP profile area statistics.
630  */
update_area_item(profile_area_t * area,Uint16 addr,dsp_profile_item_t * item)631 static void update_area_item(profile_area_t *area, Uint16 addr, dsp_profile_item_t *item)
632 {
633 	Uint64 cycles = item->cycles;
634 	Uint64 count = item->count;
635 	Uint16 diff;
636 
637 	if (!count) {
638 		return;
639 	}
640 	if (cycles == MAX_DSP_PROFILE_VALUE) {
641 		area->overflow = true;
642 	}
643 	if (item->max_cycle) {
644 		diff = item->max_cycle - item->min_cycle;
645 	} else {
646 		diff = 0;
647 	}
648 
649 	area->counters.count += count;
650 	area->counters.cycles += cycles;
651 	area->counters.cycles_diffs += diff;
652 
653 	if (addr < area->lowest) {
654 		area->lowest = addr;
655 	}
656 	area->highest = addr;
657 
658 	area->active++;
659 }
660 
661 /**
662  * Stop and process the DSP profiling data; collect stats and
663  * prepare for more optimal sorting.
664  */
Profile_DspStop(void)665 void Profile_DspStop(void)
666 {
667 	dsp_profile_item_t *item;
668 	profile_area_t *area;
669 	Uint16 *sort_arr;
670 	Uint32 addr;
671 
672 	if (dsp_profile.processed || !dsp_profile.enabled) {
673 		return;
674 	}
675 
676 	log_last_loop();
677 	if (profile_loop.fp) {
678 		fflush(profile_loop.fp);
679 	}
680 
681 	Profile_FinalizeCalls(&(dsp_callinfo), &(dsp_profile.ram.counters), Symbols_GetByDspAddress);
682 
683 	/* find lowest and highest  addresses executed */
684 	area = &dsp_profile.ram;
685 	memset(area, 0, sizeof(profile_area_t));
686 	area->lowest = DSP_PROFILE_ARR_SIZE;
687 
688 	item = dsp_profile.data;
689 	for (addr = 0; addr < DSP_PROFILE_ARR_SIZE; addr++, item++) {
690 		update_area_item(area, addr, item);
691 	}
692 
693 	/* allocate address array for sorting */
694 	sort_arr = calloc(dsp_profile.ram.active, sizeof(*sort_arr));
695 
696 	if (!sort_arr) {
697 		perror("ERROR: allocating DSP profile address data");
698 		free(dsp_profile.data);
699 		dsp_profile.data = NULL;
700 		return;
701 	}
702 	printf("Allocated DSP profile address buffer (%d KB).\n",
703 	       (int)sizeof(*sort_arr)*(dsp_profile.ram.active+512)/1024);
704 	dsp_profile.sort_arr = sort_arr;
705 
706 	/* ...and fill addresses for used instructions... */
707 	area = &dsp_profile.ram;
708 	item = &(dsp_profile.data[area->lowest]);
709 	for (addr = area->lowest; addr <= area->highest; addr++, item++) {
710 		if (item->count) {
711 			*sort_arr++ = addr;
712 		}
713 	}
714 	//printf("%d/%d/%d\n", area->active, sort_arr-dsp_profile.sort_arr, active);
715 
716 	Profile_DspShowStats();
717 	dsp_profile.processed = true;
718 }
719 
720 /**
721  * Get pointers to DSP profile enabling and disasm address variables
722  * for updating them (in parser).
723  */
Profile_DspGetPointers(bool ** enabled,Uint32 ** disasm_addr)724 void Profile_DspGetPointers(bool **enabled, Uint32 **disasm_addr)
725 {
726 	*disasm_addr = &dsp_profile.disasm_addr;
727 	*enabled = &dsp_profile.enabled;
728 }
729 
730 /**
731  * Get callinfo & symbol search pointers for stack walking.
732  */
Profile_DspGetCallinfo(callinfo_t ** callinfo,const char * (** get_symbol)(Uint32,symtype_t))733 void Profile_DspGetCallinfo(callinfo_t **callinfo, const char* (**get_symbol)(Uint32, symtype_t))
734 {
735 	*callinfo = &(dsp_callinfo);
736 	*get_symbol = Symbols_GetByDspAddress;
737 }
738