123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360 |
- #!/usr/bin/env perl
- #
- # Takes profile file as an input and prints out annotated disassmebly
- # Usage:
- # ./annotate_profile.pl <binary_name> <profile_name>
- #
- # Function to draw bar of the specified length filled up to specified length
- sub DRAW_BAR($$) {
- my ($length, $filled) = @_;
- my $bar = "";
- --$filled;
- while ($filled > 0) {
- $bar = $bar . "X";
- $length--;
- $filled--;
- }
- while ($length > 0) {
- $bar = $bar . " ";
- $length--;
- }
- return $bar;
- }
- my $curFunc = "";
- my $curModule = "";
- my $allHits = 0;
- my %moduleHits;
- my %funcModule;
- my %funcHits;
- my %funcHottestCount;
- my %funcStart;
- my %funcEnd;
- my %funcNames;
- my %funcBaseAddrs;
- my %funcSizes;
- my %addrHits;
- my %addrFunc;
- my %moduleBaseAddr;
- my @funcSortByAddr;
- my %demangledNames;
- my %srcLineHits;
- my %srcFileHits;
- # Demagles C++ function name
- sub DEMANGLE($) {
- my ($name) = @_;
- if (exists $demangledNames{$name}) {
- return $demangledNames{$name};
- }
- if ($name =~ /^_Z/) {
- my $cmd = "c++filt -p \'$name\' |";
- open(my $RES, $cmd ) || die "No c++filt";
- my $demangled_name = <$RES>;
- chomp($demangled_name);
- close $RES;
- if (length($demangled_name) !=0) {
- $name = $demangled_name;
- }
- }
- return $name;
- }
- # Saves function info
- sub AddFunc($$$$$)
- {
- my ($func, $bin_file, $baseAddr, $size, $name) = @_;
- $funcModule{$func} = $bin_file;
- $funcBaseAddrs{$func} = $baseAddr;
- # A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??)
- if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) {
- $funcSizes{$func} = $size;
- }
- $funcNames{$func} = $name;
- $funcStart{$func} = $func;
- # printf "%08x\t%08x\t%016x\t%s\t%s\n",
- # $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func};
- }
- # Reads list of all functions in a module
- sub ReadFunctionList($$) {
- my ($bin_file, $moduleBaseAddr) = @_;
- if (! -e $bin_file) {
- return;
- }
- my $readelf_cmd = "readelf -W -s $bin_file |";
- # print "$readelf_cmd\n";
- my $IN_FILE;
- open($IN_FILE, $readelf_cmd) || die "couldn't open the file!";
- while (my $line = <$IN_FILE>) {
- chomp($line);
- # " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0"
- if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) {
- # Read function info
- my $name = $3;
- my $baseAddr = hex($1) + $moduleBaseAddr;
- my $func = $baseAddr;
- my $size = $2;
- AddFunc($func, $bin_file, $baseAddr, $size, $name);
- }
- }
- close($IN_FILE);
- @funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs;
- # printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1;
- }
- # Reads the profile and attributes address hits to the functions
- sub ReadSamples() {
- # First pass saves all samples in a hash-table
- my $samples_file = $ARGV[1];
- my $IN_FILE;
- open($IN_FILE, $samples_file)|| die "couldn't open the file!";
- my $curFuncInd = 0;
- my $curFunc = 0;
- my $curFuncBegin = 0;
- my $curFuncEnd = 0;
- my $curModule = "";
- my $curModuleBase = 0;
- my $read_samples = 0;
- my $samplesStarted = 0;
- while (my $line = <$IN_FILE>) {
- chomp($line);
- if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) {
- $total_samples = $1;
- $unique_samples = $2;
- $dropped_samples = $3;
- $search_skips = $4;
- next;
- }
- if ($line =~ m/^Samples:$/) {
- $samplesStarted = 1;
- next;
- } elsif (!$samplesStarted) {
- print "$line\n";
- next;
- }
- # print "$line\n";
- if ($line =~ m/^Func\t\d+/) {
- # "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init"
- my @cols = split(/\t/, $line);
- $curModule = $cols[4];
- $curModuleBase = hex($cols[2]);
- if (0x400000 == $curModuleBase) {
- $curModuleBase = 0;
- }
- $curFunc = hex($cols[3]);
- if (! exists $moduleBaseAddr{$curModule}) {
- $moduleBaseAddr{$curModule} = $curModuleBase;
- ReadFunctionList($curModule, $curModuleBase);
- }
- if (! exists $funcNames{$curFunc}) {
- my $name = sprintf("unknown_0x%08x", $curFunc);
- AddFunc($curFunc, $curModule, $curFunc, 0, $name);
- }
- } elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) {
- # Read one sample for the current function
- $read_samples++;
- my $addr = hex($1);
- # print "$addr\n";
- if ($addr >= $curFuncEnd) {
- # Find the function the current address belongs to
- while ($curFuncInd <= $#funcSortByAddr) {
- my $f = $funcSortByAddr[$curFuncInd];
- my $begin = $funcBaseAddrs{$f};
- my $end = $funcBaseAddrs{$f} + $funcSizes{$f};
- if ($begin <= $addr and $addr < $end) {
- $curFunc = $f;
- $funcStart{$curFunc} = $addr;
- $curFuncBegin = $begin;
- $curFuncEnd = $end;
- last;
- } elsif ($addr < $begin) {
- # printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n",
- # $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line;
- last;
- }
- ++$curFuncInd;
- }
- }
- $funcHits{$curFunc} += $2;
- if ($funcHottestCount{$curFunc} < $2) {
- $funcHottestCount{$curFunc} = $2;
- }
- $addrHits{$addr} = $2;
- $addrFunc{$addr} = $curFunc;
- $funcEnd{$curFunc} = $addr;
- $allHits += $2;
- $moduleHits{$curModule} += $2;
- # printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc};
- }
- }
- close($IN_FILE);
-
- printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips;
- if ($read_samples != $unique_samples) {
- printf "\n-----------------------------------------------------------------------------------------------------\n";
- printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples;
- printf "\n-----------------------------------------------------------------------------------------------------\n";
- }
- }
- # Dumps module stats
- sub DumpModules() {
- # Sort functions by hit counts and dump the list
- my @modules = sort {$a <=> $b } keys %moduleHits;
- for (my $i = 0; $i <= $#modules; ++$i) {
- my $m = $modules[$i];
- my $cnt = $moduleHits{$m};
- my $perc = 100.0 * $cnt / $allHits;
- printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m;
- }
- }
- # Dumps top N hot functions
- sub DumpHotFunc($) {
- my ($maxCnt) = @_;
- # Sort functions by hit counts and dump the list
- my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
- # print $#hotFunc;
- for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) {
- my $f = $hotFunc[$i];
- my $cnt = $funcHits{$f};
- my $perc = 100.0 * $cnt / $allHits;
- printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f});
- }
- }
- # Dumps top N hotspots (hot addresses)
- sub DumpHotSpots($) {
- my ($maxCnt) = @_;
- # Sort addresses by hit counts and dump the list
- my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits;
- for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) {
- my $s = $hotSpots[$i];
- my $cnt = $addrHits{$s};
- my $perc = 100.0 * $cnt / $allHits;
- my $f = $addrFunc{$s};
- my $fname = $funcNames{$f};
- printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n",
- $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f};
- }
- }
- # Adds hit informations to a disassembly line
- sub ANNOTATE_DISASSM($$$$) {
- my ($address, $disassm, $max_hit_count, $func_hit_count) = @_;
- my $hit_count = $addrHits{$address};
- my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count);
- $address = sprintf("% 8x", $address);
- print $address . " " . $hit_count . "\t" . $perc . " |" .
- DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n";
- }
- # Dumps annotated disassembly of the specified function (actually not the whole function but
- # just the addresses between the first and last hit)
- sub DumpDisasm($) {
- my ($name) = @_;
- if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) {
- my $module = $funcModule{$name};
- my $modBase = $moduleBaseAddr{$module};
- my $start_address = $funcStart{$name} - $modBase;
- my $stop_address = $funcEnd{$name} - $modBase + 1;
- # print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---";
- my $max_hit_count = $funcHits{$name};
- my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address .
- " --stop-address=" . $stop_address . " " . $module . " |";
- if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug
- # print STDERR $objdump_cmd . "\n";
- open(my $OBJDUMP, $objdump_cmd) || die "No objdump";
- my $srcLine = "func# ". $name;
- my $srcFile = $module;
- while (my $objdump_line = <$OBJDUMP>) {
- # filter disassembly lines
- if ($objdump_line =~ /^Disassembly of section/) {
- } elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) {
- my $addr = hex($1);
- my $hit_count = $addrHits{$addr};
- if ($hit_count > 0) {
- $srcLineHits{$srcLine} += $hit_count;
- $srcFileHits{$srcFile} += $hit_count;
- }
- ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count);
- } elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) {
- $srcLine = $objdump_line;
- $srcFile = $1;
- chomp($srcLine);
- print $objdump_line;
- } else {
- print $objdump_line;
- }
- }
- close $OBJDUMP;
- }
- }
- }
- # Dumps disassemlby for top N hot functions
- sub DumpFuncDissasm($) {
- (my $maxCnt) = @_;
- my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
- print $#funcs . "\n";
- for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) {
- my $f = $funcs[$i];
- print "\n--------------------------------------------------------------------------------------------------------------\n";
- printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
- $funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f});
- print "--------------------------------------------------------------------------------------------------------------\n";
- DumpDisasm($f);
- }
- }
- sub DumpSrcFiles($) {
- (my $maxCnt) = @_;
- my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits;
- for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) {
- my $f = $srcFiles[$i];
- my $cnt = $srcFileHits{$f};
- printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f;
- }
- }
- sub DumpSrcLines($) {
- (my $maxCnt) = @_;
- my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits;
- for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) {
- my $l = $srcLines[$i];
- my $cnt = $srcLineHits{$l};
- printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l;
- }
- }
- ReadFunctionList($ARGV[0], 0);
- ReadSamples();
- print "\nModules:\n";
- DumpModules();
- print "\nHot functions:\n";
- DumpHotFunc(100);
- print "\nHotspots:\n";
- DumpHotSpots(100);
- DumpFuncDissasm(100);
- print "\nHot src files:\n";
- DumpSrcFiles(100);
- print "\nHot src lines:\n";
- DumpSrcLines(100);
- # my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits;
- # printf "%d\n", $#funcs;
- # for (my $i = 0; $i <= $#funcs; ++$i) {
- # my $f = $funcs[$i];
- # printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
- # $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f};
- # #DumpDisasm($f);
- # }
|