annotate_profile.pl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. #!/usr/bin/env perl
  2. #
  3. # Takes profile file as an input and prints out annotated disassmebly
  4. # Usage:
  5. # ./annotate_profile.pl <binary_name> <profile_name>
  6. #
  7. # Function to draw bar of the specified length filled up to specified length
  8. sub DRAW_BAR($$) {
  9. my ($length, $filled) = @_;
  10. my $bar = "";
  11. --$filled;
  12. while ($filled > 0) {
  13. $bar = $bar . "X";
  14. $length--;
  15. $filled--;
  16. }
  17. while ($length > 0) {
  18. $bar = $bar . " ";
  19. $length--;
  20. }
  21. return $bar;
  22. }
  23. my $curFunc = "";
  24. my $curModule = "";
  25. my $allHits = 0;
  26. my %moduleHits;
  27. my %funcModule;
  28. my %funcHits;
  29. my %funcHottestCount;
  30. my %funcStart;
  31. my %funcEnd;
  32. my %funcNames;
  33. my %funcBaseAddrs;
  34. my %funcSizes;
  35. my %addrHits;
  36. my %addrFunc;
  37. my %moduleBaseAddr;
  38. my @funcSortByAddr;
  39. my %demangledNames;
  40. my %srcLineHits;
  41. my %srcFileHits;
  42. # Demagles C++ function name
  43. sub DEMANGLE($) {
  44. my ($name) = @_;
  45. if (exists $demangledNames{$name}) {
  46. return $demangledNames{$name};
  47. }
  48. if ($name =~ /^_Z/) {
  49. my $cmd = "c++filt -p \'$name\' |";
  50. open(my $RES, $cmd ) || die "No c++filt";
  51. my $demangled_name = <$RES>;
  52. chomp($demangled_name);
  53. close $RES;
  54. if (length($demangled_name) !=0) {
  55. $name = $demangled_name;
  56. }
  57. }
  58. return $name;
  59. }
  60. # Saves function info
  61. sub AddFunc($$$$$)
  62. {
  63. my ($func, $bin_file, $baseAddr, $size, $name) = @_;
  64. $funcModule{$func} = $bin_file;
  65. $funcBaseAddrs{$func} = $baseAddr;
  66. # A function with the same base address can be mentioned multiple times with different sizes (0, and non-0, WTF??)
  67. if ((! exists $funcSizes{$func}) || ($funcSizes{$func} < $size)) {
  68. $funcSizes{$func} = $size;
  69. }
  70. $funcNames{$func} = $name;
  71. $funcStart{$func} = $func;
  72. # printf "%08x\t%08x\t%016x\t%s\t%s\n",
  73. # $funcBaseAddrs{$func}, $funcSizes{$func}, $moduleBaseAddr, $funcModule{$func}, $funcNames{$func};
  74. }
  75. # Reads list of all functions in a module
  76. sub ReadFunctionList($$) {
  77. my ($bin_file, $moduleBaseAddr) = @_;
  78. if (! -e $bin_file) {
  79. return;
  80. }
  81. my $readelf_cmd = "readelf -W -s $bin_file |";
  82. # print "$readelf_cmd\n";
  83. my $IN_FILE;
  84. open($IN_FILE, $readelf_cmd) || die "couldn't open the file!";
  85. while (my $line = <$IN_FILE>) {
  86. chomp($line);
  87. # " 33: 00000000000a0fc0 433 FUNC GLOBAL DEFAULT 10 getipnodebyaddr@@FBSD_1.0"
  88. if ($line =~ m/^\s*\d+:\s+([0-9a-fA-F]+)\s+(\d+)\s+FUNC\s+\w+\s+DEFAULT\s+\d+\s+(.*)$/) {
  89. # Read function info
  90. my $name = $3;
  91. my $baseAddr = hex($1) + $moduleBaseAddr;
  92. my $func = $baseAddr;
  93. my $size = $2;
  94. AddFunc($func, $bin_file, $baseAddr, $size, $name);
  95. }
  96. }
  97. close($IN_FILE);
  98. @funcSortByAddr = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcBaseAddrs;
  99. # printf "%016x\t%s\t%d\n", $moduleBaseAddr, $bin_file, $#funcSortByAddr+1;
  100. }
  101. # Reads the profile and attributes address hits to the functions
  102. sub ReadSamples() {
  103. # First pass saves all samples in a hash-table
  104. my $samples_file = $ARGV[1];
  105. my $IN_FILE;
  106. open($IN_FILE, $samples_file)|| die "couldn't open the file!";
  107. my $curFuncInd = 0;
  108. my $curFunc = 0;
  109. my $curFuncBegin = 0;
  110. my $curFuncEnd = 0;
  111. my $curModule = "";
  112. my $curModuleBase = 0;
  113. my $read_samples = 0;
  114. my $samplesStarted = 0;
  115. while (my $line = <$IN_FILE>) {
  116. chomp($line);
  117. if ($line =~ m/^samples:\s+(\d+)\s+unique:\s+(\d+)\s+dropped:\s+(\d+)\s+searchskips:\s+(\d+)$/) {
  118. $total_samples = $1;
  119. $unique_samples = $2;
  120. $dropped_samples = $3;
  121. $search_skips = $4;
  122. next;
  123. }
  124. if ($line =~ m/^Samples:$/) {
  125. $samplesStarted = 1;
  126. next;
  127. } elsif (!$samplesStarted) {
  128. print "$line\n";
  129. next;
  130. }
  131. # print "$line\n";
  132. if ($line =~ m/^Func\t\d+/) {
  133. # "Func 2073 0x803323000 0x803332fd0 /lib/libthr.so.3 pthread_cond_init"
  134. my @cols = split(/\t/, $line);
  135. $curModule = $cols[4];
  136. $curModuleBase = hex($cols[2]);
  137. if (0x400000 == $curModuleBase) {
  138. $curModuleBase = 0;
  139. }
  140. $curFunc = hex($cols[3]);
  141. if (! exists $moduleBaseAddr{$curModule}) {
  142. $moduleBaseAddr{$curModule} = $curModuleBase;
  143. ReadFunctionList($curModule, $curModuleBase);
  144. }
  145. if (! exists $funcNames{$curFunc}) {
  146. my $name = sprintf("unknown_0x%08x", $curFunc);
  147. AddFunc($curFunc, $curModule, $curFunc, 0, $name);
  148. }
  149. } elsif ($line =~ m/^\d+\t0x([0-9,a-f,A-F]+)\t(\d+)/) {
  150. # Read one sample for the current function
  151. $read_samples++;
  152. my $addr = hex($1);
  153. # print "$addr\n";
  154. if ($addr >= $curFuncEnd) {
  155. # Find the function the current address belongs to
  156. while ($curFuncInd <= $#funcSortByAddr) {
  157. my $f = $funcSortByAddr[$curFuncInd];
  158. my $begin = $funcBaseAddrs{$f};
  159. my $end = $funcBaseAddrs{$f} + $funcSizes{$f};
  160. if ($begin <= $addr and $addr < $end) {
  161. $curFunc = $f;
  162. $funcStart{$curFunc} = $addr;
  163. $curFuncBegin = $begin;
  164. $curFuncEnd = $end;
  165. last;
  166. } elsif ($addr < $begin) {
  167. # printf "X3: func:%08x\tname:%s\tbase:%08x\tsize:%08x\t%s\nline:%s\n",
  168. # $curFunc, $funcNames{$curFunc}, $funcBaseAddrs{$curFunc}, $funcSizes{$curFunc}, $curModule, $line;
  169. last;
  170. }
  171. ++$curFuncInd;
  172. }
  173. }
  174. $funcHits{$curFunc} += $2;
  175. if ($funcHottestCount{$curFunc} < $2) {
  176. $funcHottestCount{$curFunc} = $2;
  177. }
  178. $addrHits{$addr} = $2;
  179. $addrFunc{$addr} = $curFunc;
  180. $funcEnd{$curFunc} = $addr;
  181. $allHits += $2;
  182. $moduleHits{$curModule} += $2;
  183. # printf "%08x\t%08x\t%08x\t%08x\t%s\n", $addr, $curFunc, $curFuncBegin, $curFuncEnd, $funcNames{$curFunc};
  184. }
  185. }
  186. close($IN_FILE);
  187. printf "\nsamples: %d unique: %d dropped: %d searchskips: %d\n", $total_samples, $unique_samples, $dropped_samples, $search_skips;
  188. if ($read_samples != $unique_samples) {
  189. printf "\n-----------------------------------------------------------------------------------------------------\n";
  190. printf "!!!!WARNING: read %d samples, expected %d samples, profiling results might be not acqurate!!!!", $read_samples, $unique_samples;
  191. printf "\n-----------------------------------------------------------------------------------------------------\n";
  192. }
  193. }
  194. # Dumps module stats
  195. sub DumpModules() {
  196. # Sort functions by hit counts and dump the list
  197. my @modules = sort {$a <=> $b } keys %moduleHits;
  198. for (my $i = 0; $i <= $#modules; ++$i) {
  199. my $m = $modules[$i];
  200. my $cnt = $moduleHits{$m};
  201. my $perc = 100.0 * $cnt / $allHits;
  202. printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $m;
  203. }
  204. }
  205. # Dumps top N hot functions
  206. sub DumpHotFunc($) {
  207. my ($maxCnt) = @_;
  208. # Sort functions by hit counts and dump the list
  209. my @hotFunc = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
  210. # print $#hotFunc;
  211. for (my $i = 0; $i <= $#hotFunc && $i < $maxCnt; ++$i) {
  212. my $f = $hotFunc[$i];
  213. my $cnt = $funcHits{$f};
  214. my $perc = 100.0 * $cnt / $allHits;
  215. printf "%12d\t%6.2f%% |%s %s\n", $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), DEMANGLE($funcNames{$f});
  216. }
  217. }
  218. # Dumps top N hotspots (hot addresses)
  219. sub DumpHotSpots($) {
  220. my ($maxCnt) = @_;
  221. # Sort addresses by hit counts and dump the list
  222. my @hotSpots = sort {$addrHits{$b} <=> $addrHits{$a} } keys %addrHits;
  223. for (my $i = 0; $i <= $#hotSpots && $i < $maxCnt; ++$i) {
  224. my $s = $hotSpots[$i];
  225. my $cnt = $addrHits{$s};
  226. my $perc = 100.0 * $cnt / $allHits;
  227. my $f = $addrFunc{$s};
  228. my $fname = $funcNames{$f};
  229. printf "%12d\t%6.2f%% |%s 0x%016x\t%s + 0x%x\n",
  230. $cnt, $perc, DRAW_BAR(20, 20*$cnt/$allHits), $s, DEMANGLE($fname), $s - $funcBaseAddrs{$f};
  231. }
  232. }
  233. # Adds hit informations to a disassembly line
  234. sub ANNOTATE_DISASSM($$$$) {
  235. my ($address, $disassm, $max_hit_count, $func_hit_count) = @_;
  236. my $hit_count = $addrHits{$address};
  237. my $perc = sprintf("% 7.2f%%", 100*$hit_count/$func_hit_count);
  238. $address = sprintf("% 8x", $address);
  239. print $address . " " . $hit_count . "\t" . $perc . " |" .
  240. DRAW_BAR(20, 20*$hit_count/$max_hit_count) . "\t" . $disassm . "\n";
  241. }
  242. # Dumps annotated disassembly of the specified function (actually not the whole function but
  243. # just the addresses between the first and last hit)
  244. sub DumpDisasm($) {
  245. my ($name) = @_;
  246. if (exists $funcStart{$name} && exists $funcEnd{$name} && $funcStart{$name}!=0) {
  247. my $module = $funcModule{$name};
  248. my $modBase = $moduleBaseAddr{$module};
  249. my $start_address = $funcStart{$name} - $modBase;
  250. my $stop_address = $funcEnd{$name} - $modBase + 1;
  251. # print " " . $funcStart{$name} . " " . $funcEnd{$name} . " $modBase ---";
  252. my $max_hit_count = $funcHits{$name};
  253. my $objdump_cmd = "objdump -C -d -l --start-address=" . $start_address .
  254. " --stop-address=" . $stop_address . " " . $module . " |";
  255. if ($stop_address - $start_address < 10000000) { # don't try to disaassemble more than 10MB, because most likely it's a bug
  256. # print STDERR $objdump_cmd . "\n";
  257. open(my $OBJDUMP, $objdump_cmd) || die "No objdump";
  258. my $srcLine = "func# ". $name;
  259. my $srcFile = $module;
  260. while (my $objdump_line = <$OBJDUMP>) {
  261. # filter disassembly lines
  262. if ($objdump_line =~ /^Disassembly of section/) {
  263. } elsif ($objdump_line =~ m/^\s*([0-9,a-f,A-F]+):\s*(.*)/) {
  264. my $addr = hex($1);
  265. my $hit_count = $addrHits{$addr};
  266. if ($hit_count > 0) {
  267. $srcLineHits{$srcLine} += $hit_count;
  268. $srcFileHits{$srcFile} += $hit_count;
  269. }
  270. ANNOTATE_DISASSM($addr + $modBase, $2, $funcHottestCount{$name}, $max_hit_count);
  271. } elsif ($objdump_line =~ m/^(\/.*):(\d+)$/) {
  272. $srcLine = $objdump_line;
  273. $srcFile = $1;
  274. chomp($srcLine);
  275. print $objdump_line;
  276. } else {
  277. print $objdump_line;
  278. }
  279. }
  280. close $OBJDUMP;
  281. }
  282. }
  283. }
  284. # Dumps disassemlby for top N hot functions
  285. sub DumpFuncDissasm($) {
  286. (my $maxCnt) = @_;
  287. my @funcs = sort {$funcHits{$b} <=> $funcHits{$a} } keys %funcHits;
  288. print $#funcs . "\n";
  289. for (my $i = 0; $i <= $#funcs && $i < $maxCnt; ++$i) {
  290. my $f = $funcs[$i];
  291. print "\n--------------------------------------------------------------------------------------------------------------\n";
  292. printf "hits:%d\t%7.2f%%\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
  293. $funcHits{$f}, 100*$funcHits{$f}/$allHits, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, DEMANGLE($funcNames{$f});
  294. print "--------------------------------------------------------------------------------------------------------------\n";
  295. DumpDisasm($f);
  296. }
  297. }
  298. sub DumpSrcFiles($) {
  299. (my $maxCnt) = @_;
  300. my @srcFiles = sort {$srcFileHits{$b} <=> $srcFileHits{$a} } keys %srcFileHits;
  301. for (my $i = 0; $i <= $#srcFiles && $i < $maxCnt; ++$i) {
  302. my $f = $srcFiles[$i];
  303. my $cnt = $srcFileHits{$f};
  304. printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $f;
  305. }
  306. }
  307. sub DumpSrcLines($) {
  308. (my $maxCnt) = @_;
  309. my @srcLines = sort {$srcLineHits{$b} <=> $srcLineHits{$a} } keys %srcLineHits;
  310. for (my $i = 0; $i <= $#srcLines && $i < $maxCnt; ++$i) {
  311. my $l = $srcLines[$i];
  312. my $cnt = $srcLineHits{$l};
  313. printf "%12d\t%6.2f%% |%s %s\n", $cnt, 100*$cnt/$allHits, DRAW_BAR(20, 20*$cnt/$allHits), $l;
  314. }
  315. }
  316. ReadFunctionList($ARGV[0], 0);
  317. ReadSamples();
  318. print "\nModules:\n";
  319. DumpModules();
  320. print "\nHot functions:\n";
  321. DumpHotFunc(100);
  322. print "\nHotspots:\n";
  323. DumpHotSpots(100);
  324. DumpFuncDissasm(100);
  325. print "\nHot src files:\n";
  326. DumpSrcFiles(100);
  327. print "\nHot src lines:\n";
  328. DumpSrcLines(100);
  329. # my @funcs = sort {$funcBaseAddrs{$a} <=> $funcBaseAddrs{$b} } keys %funcHits;
  330. # printf "%d\n", $#funcs;
  331. # for (my $i = 0; $i <= $#funcs; ++$i) {
  332. # my $f = $funcs[$i];
  333. # printf "%s\t%d\tbase:%08x\tstart:%08x\tend:%08x\t%s\n",
  334. # $funcNames{$f}, $funcHits{$f}, $funcBaseAddrs{$f}, $funcStart{$f}, $funcEnd{$f}, $funcModule{$f};
  335. # #DumpDisasm($f);
  336. # }