1#!/usr/bin/awk -f 2# Script to analyze experimental results of our branch prediction heuristics 3# Contributed by Jan Hubicka, SuSE Inc. 4# Copyright (C) 2001 Free Software Foundation, Inc. 5# 6# This file is part of GNU CC. 7# 8# GNU CC is free software; you can redistribute it and/or modify 9# it under the terms of the GNU General Public License as published by 10# the Free Software Foundation; either version 2, or (at your option) 11# any later version. 12# 13# GNU CC is distributed in the hope that it will be useful, 14# but WITHOUT ANY WARRANTY; without even the implied warranty of 15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16# GNU General Public License for more details. 17# 18# You should have received a copy of the GNU General Public License 19# along with GNU CC; see the file COPYING. If not, write to 20# the Free Software Foundation, 59 Temple Place - Suite 330, 21# Boston, MA 02111-1307, USA. 22# 23# 24# This script is used to calculate two basic properties of the branch prediction 25# heuristics - coverage and hitrate. Coverage is number of executions of a given 26# branch matched by the heuristics and hitrate is probability that once branch is 27# predicted as taken it is really taken. 28# 29# These values are useful to determine the quality of given heuristics. Hitrate 30# may be directly used in predict.c. 31# 32# Usage: 33# Step 1: Compile and profile your program. You need to use -fprofile-arcs 34# flag to get the profiles 35# Step 2: Generate log files. The information about given heuristics are 36# saved into *.life dumps. You need to pass the -df switch to the compiler as well 37# as -fbranch-probabilities to get the results of profiling noted in the dumps. 38# Ensure that there are no "Arc profiling: some edge counts were bad." warnings. 39# Step 3: Run this script to concatenate all *.life files: 40# analyze_brprob `find . -name *.life` 41# the information is collected and print once all files are parsed. This 42# may take a while. 43# Note that the script does use bc to perform long arithmetic. 44# Step 4: Read the results. Basically the following table is printed: 45# (this is just an example from a very early stage of branch prediction pass 46# development, so please don't take these numbers seriously) 47# 48#HEURISTICS BRANCHES (REL) HITRATE COVERAGE (REL) 49#opcode 2889 83.7% 94.96%/ 97.62% 7516383 75.3% 50#pointer 246 7.1% 99.69%/ 99.86% 118791 1.2% 51#loop header 449 13.0% 98.32%/ 99.07% 43553 0.4% 52#first match 3450 100.0% 89.92%/ 97.27% 9979782 100.0% 53#loop exit 924 26.8% 88.95%/ 95.58% 9026266 90.4% 54#error return 150 4.3% 64.48%/ 86.81% 453542 4.5% 55#call 803 23.3% 51.66%/ 98.61% 3614037 36.2% 56#loop branch 51 1.5% 99.26%/ 99.27% 26854 0.3% 57#noreturn call 951 27.6% 100.00%/100.00% 1759809 17.6% 58# 59# The heuristic called "first match" is a heuristic used by GCC branch 60# prediction pass and it predicts 89.92% branches correctly. 61# 62# The quality of heuristics can be rated using both, coverage and hitrate 63# parameters. For example "loop branch" heuristics (predicting loopback edge 64# as taken) have both very high hitrate and coverage, so it is very useful. 65# On the other hand, "exit block" heuristics (predicting exit edges as not 66# taken) have good hitrate, but poor coverage, so only 3 branches have been 67# predicted. The "loop header" heuristic has problems, since it tends to 68# misspredict. 69# 70# The implementation of this script is somewhat brute force. My awk skills 71# are limited. 72 73function longeval(e) 74{ 75 e = "echo \"scale = 2 ;"e"\" | bc" 76 e | getline res 77 close (e) 78 return res 79} 80 81BEGIN {nnames = 0} 82 83/^ .* heuristics: .*.$/ { 84 name=$0 85 sub (/^ /,"",name) 86 sub (/ heuristics: .*.$/,"",name) 87 if (!(name in branches)) 88 { 89 names[nnames] = name 90 branches[name]=0 91 counts[name]=0 92 hits[name]=0 93 phits[name]=0 94 nnames++ 95 } 96 branches[name]+=1 97 } 98 99/^ .* heuristics: .*. exec [0-9]* hit [0-9]* (.*.)$/ { 100 name=$0 101 sub (/^ /,"",name) 102 sub (/ heuristics: .*. exec [0-9]* hit [0-9]* (.*.)$/,"",name) 103 pred=$0 104 sub (/^ .* heuristics: /,"",pred) 105 sub (/. exec [0-9]* hit [0-9]* (.*.)$/,"",pred) 106 count=$0 107 sub (/^ .* heuristics: .*. exec /,"",count) 108 sub (/ hit [0-9]* (.*.)$/,"",count) 109 hit=$0 110 sub (/^ .* heuristics: .*. exec [0-9]* hit /,"",hit) 111 sub (/ (.*.)$/,"",hit) 112 113 if (int(pred) < 50.0) 114 { 115 hit = count"-"hit; 116 } 117 counts[name]=counts[name] "+" count 118 hits[name]=hits[name] "+" hit 119 phits[name]=phits[name] "+(("hit")<"count"/2)*("count"-("hit"))+(("hit")>="count"/2)*("hit")" 120 121 #BC crashes on long strings. Irritating. 122 if (length(counts[name]) > 2000) 123 counts[name] = longeval(counts[name]) 124 if (length(hits[name]) > 2000) 125 hits[name] = longeval(hits[name]) 126 if (length(phits[name]) > 2000) 127 phits[name] = longeval(phits[name]) 128 } 129END { 130 # Heuristics called combined predicts just everything. 131 maxcounts = longeval(counts["combined"]) 132 maxbranches = branches["combined"] 133 max = names["combined"] 134 printf("HEURISTICS BRANCHES (REL) HITRATE COVERAGE (REL)\n") 135 for (i = 0; i < nnames ; i++) 136 { 137 name = names[i] 138 counts[name] = longeval(counts[name]) 139 printf ("%-27s %8i %5.1f%% %6s%%/%6s%% %12s %5.1f%%\n", 140 name, 141 branches[name], branches[name] * 100 / maxbranches, 142 longeval("("hits[name]") * 100 /(" counts[name]"-0.00001)"), 143 longeval("("phits[name]") * 100 /(" counts[name]"-0.00001)"), 144 counts[name], longeval(counts[name]" * 100 / ("maxcounts"-0.00001)")) 145 } 146} 147