stackcollapse-sample.awk 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. #!/usr/bin/awk -f
  2. #
  3. # Uses MacOS' /usr/bin/sample to generate a flamegraph of a process
  4. #
  5. # Usage:
  6. #
  7. # sudo sample [pid] -file /dev/stdout | stackcollapse-sample.awk | flamegraph.pl
  8. #
  9. # Options:
  10. #
  11. # The output will show the name of the library/framework at the call-site
  12. # with the form AppKit`NSApplication or libsystem`start_wqthread.
  13. #
  14. # If showing the framework or library name is not required, pass
  15. # MODULES=0 as an argument of the sample program.
  16. #
  17. # The generated SVG will be written to the output stream, and can be piped
  18. # into flamegraph.pl directly, or written to a file for conversion later.
  19. #
  20. # ---
  21. #
  22. # Copyright (c) 2017, Apple Inc.
  23. #
  24. # Redistribution and use in source and binary forms, with or without
  25. # modification, are permitted provided that the following conditions are met:
  26. #
  27. # 1. Redistributions of source code must retain the above copyright notice,
  28. # this list of conditions and the following disclaimer.
  29. #
  30. # 2. Redistributions in binary form must reproduce the above copyright notice,
  31. # this list of conditions and the following disclaimer in the documentation
  32. # and/or other materials provided with the distribution.
  33. #
  34. # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  35. # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  36. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  37. # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  38. # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  39. # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  40. # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  41. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  42. # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  43. # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  44. # POSSIBILITY OF SUCH DAMAGE.
  45. #
  46. BEGIN {
  47. # Command line options
  48. MODULES = 1 # Allows the user to enable/disable printing of modules.
  49. # Internal variables
  50. _FOUND_STACK = 0 # Found the stack traces in the output.
  51. _LEVEL = -1 # The current level of indentation we are running.
  52. # The set of symbols to ignore for 'waiting' threads, for ease of use.
  53. # This will hide waiting threads from the view, making it easier to
  54. # see what is actually running in the sample. These may be adjusted
  55. # as necessary or appended to if other symbols need to be filtered out.
  56. _IGNORE["libsystem_kernel`__psynch_cvwait"] = 1
  57. _IGNORE["libsystem_kernel`__select"] = 1
  58. _IGNORE["libsystem_kernel`__semwait_signal"] = 1
  59. _IGNORE["libsystem_kernel`__ulock_wait"] = 1
  60. _IGNORE["libsystem_kernel`__wait4"] = 1
  61. _IGNORE["libsystem_kernel`__workq_kernreturn"] = 1
  62. _IGNORE["libsystem_kernel`kevent"] = 1
  63. _IGNORE["libsystem_kernel`mach_msg_trap"] = 1
  64. _IGNORE["libsystem_kernel`read"] = 1
  65. _IGNORE["libsystem_kernel`semaphore_wait_trap"] = 1
  66. # The same set of symbols as above, without the module name.
  67. _IGNORE["__psynch_cvwait"] = 1
  68. _IGNORE["__select"] = 1
  69. _IGNORE["__semwait_signal"] = 1
  70. _IGNORE["__ulock_wait"] = 1
  71. _IGNORE["__wait4"] = 1
  72. _IGNORE["__workq_kernreturn"] = 1
  73. _IGNORE["kevent"] = 1
  74. _IGNORE["mach_msg_trap"] = 1
  75. _IGNORE["read"] = 1
  76. _IGNORE["semaphore_wait_trap"] = 1
  77. }
  78. # This is the first line in the /usr/bin/sample output that indicates the
  79. # samples follow subsequently. Until we see this line, the rest is ignored.
  80. /^Call graph/ {
  81. _FOUND_STACK = 1
  82. }
  83. # This is found when we have reached the end of the stack output.
  84. # Identified by the string "Total number in stack (...)".
  85. /^Total number/ {
  86. _FOUND_STACK = 0
  87. printStack(_NEST,0)
  88. }
  89. # Prints the stack from FROM to TO (where FROM > TO)
  90. # Called when indenting back from a previous level, or at the end
  91. # of processing to flush the last recorded sample
  92. function printStack(FROM,TO) {
  93. # We ignore certain blocking wait states, in the absence of being
  94. # able to filter these threads from collection, otherwise
  95. # we'll end up with many threads of equal length that represent
  96. # the total time the sample was collected.
  97. #
  98. # Note that we need to collect the information to ensure that the
  99. # timekeeping for the parental functions is appropriately adjusted
  100. # so we just avoid printing it out when that occurs.
  101. _PRINT_IT = !_IGNORE[_NAMES[FROM]]
  102. # We run through all the names, from the root to the leaf, so that
  103. # we generate a line that flamegraph.pl will like, of the form:
  104. # Thread1234;example`main;example`otherFn 1234
  105. for(l = FROM; l>=TO; l--) {
  106. if (_PRINT_IT) {
  107. printf("%s", _NAMES[0])
  108. for(i=1; i<=l; i++) {
  109. printf(";%s", _NAMES[i])
  110. }
  111. print " " _TIMES[l]
  112. }
  113. # We clean up our current state to avoid bugs.
  114. delete _NAMES[l]
  115. delete _TIMES[l]
  116. }
  117. }
  118. # This is where we process each line, of the form:
  119. # 5130 Thread_8749954
  120. # + 5130 start_wqthread (in libsystem_pthread.dylib) ...
  121. # + 4282 _pthread_wqthread (in libsystem_pthread.dylib) ...
  122. # + ! 4282 __doworkq_kernreturn (in libsystem_kernel.dylib) ...
  123. # + 848 _pthread_wqthread (in libsystem_pthread.dylib) ...
  124. # + 848 __doworkq_kernreturn (in libsystem_kernel.dylib) ...
  125. _FOUND_STACK && match($0,/^ [^0-9]*[0-9]/) {
  126. # We maintain two counters:
  127. # _LEVEL: the high water mark of the indentation level we have seen.
  128. # _NEST: the current indentation level.
  129. #
  130. # We keep track of these two levels such that when the nesting level
  131. # decreases, we print out the current state of where we are.
  132. _NEST=(RLENGTH-5)/2
  133. sub(/^[^0-9]*/,"") # Normalise the leading content so we start with time.
  134. _TIME=$1 # The time recorded by 'sample', first integer value.
  135. # The function name is in one or two parts, depending on what kind of
  136. # function it is.
  137. #
  138. # If it is a standard C or C++ function, it will be of the form:
  139. # exampleFunction
  140. # Example::Function
  141. #
  142. # If it is an Objective-C funtion, it will be of the form:
  143. # -[NSExample function]
  144. # +[NSExample staticFunction]
  145. # -[NSExample function:withParameter]
  146. # +[NSExample staticFunction:withParameter:andAnother]
  147. _FN1 = $2
  148. _FN2 = $3
  149. # If it is a standard C or C++ function then the following word will
  150. # either be blank, or the text '(in', so we jut use the first one:
  151. if (_FN2 == "(in" || _FN2 == "") {
  152. _FN =_FN1
  153. } else {
  154. # Otherwise we concatenate the first two parts with .
  155. _FN = _FN1 "." _FN2
  156. }
  157. # Modules are shown with '(in libfoo.dylib)' or '(in AppKit)'
  158. _MODULE = ""
  159. match($0, /\(in [^)]*\)/)
  160. if (RSTART > 0 && MODULES) {
  161. # Strip off the '(in ' (4 chars) and the final ')' char (1 char)
  162. _MODULE = substr($0, RSTART+4, RLENGTH-5)
  163. # Remove the .dylib function, since it adds no value.
  164. gsub(/\.dylib/, "", _MODULE)
  165. # The function name is 'module`functionName'
  166. _FN = _MODULE "`" _FN
  167. }
  168. # Now we have set up the variables, we can decide how to apply it
  169. # If we are descending in the nesting, we don't print anything out:
  170. # a
  171. # ab
  172. # abc
  173. #
  174. # We only print out something when we go back a level, or hit the end:
  175. # abcd
  176. # abe < prints out the stack up until this point, i.e. abcd
  177. # We store a pair of arrays, indexed by the nesting level:
  178. #
  179. # _TIMES - a list of the time reported to that function
  180. # _NAMES - a list of the function names for each current stack trace
  181. # If we are backtracking, we need to flush the current output.
  182. if (_NEST <= _LEVEL) {
  183. printStack(_LEVEL,_NEST)
  184. }
  185. # Record the name and time of the function where we are.
  186. _NAMES[_NEST] = _FN
  187. _TIMES[_NEST] = _TIME
  188. # We subtract the time we took from our parent so we don't double count.
  189. if (_NEST > 0) {
  190. _TIMES[_NEST-1] -= _TIME
  191. }
  192. # Raise the high water mark of the level we have reached.
  193. _LEVEL = _NEST
  194. }