histogram.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package trace
  5. // This file implements histogramming for RPC statistics collection.
  6. import (
  7. "bytes"
  8. "fmt"
  9. "html/template"
  10. "log"
  11. "math"
  12. "sync"
  13. "golang.org/x/net/internal/timeseries"
  14. )
  15. const (
  16. bucketCount = 38
  17. )
  18. // histogram keeps counts of values in buckets that are spaced
  19. // out in powers of 2: 0-1, 2-3, 4-7...
  20. // histogram implements timeseries.Observable
  21. type histogram struct {
  22. sum int64 // running total of measurements
  23. sumOfSquares float64 // square of running total
  24. buckets []int64 // bucketed values for histogram
  25. value int // holds a single value as an optimization
  26. valueCount int64 // number of values recorded for single value
  27. }
  28. // addMeasurement records a value measurement observation to the histogram.
  29. func (h *histogram) addMeasurement(value int64) {
  30. // TODO: assert invariant
  31. h.sum += value
  32. h.sumOfSquares += float64(value) * float64(value)
  33. bucketIndex := getBucket(value)
  34. if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
  35. h.value = bucketIndex
  36. h.valueCount++
  37. } else {
  38. h.allocateBuckets()
  39. h.buckets[bucketIndex]++
  40. }
  41. }
  42. func (h *histogram) allocateBuckets() {
  43. if h.buckets == nil {
  44. h.buckets = make([]int64, bucketCount)
  45. h.buckets[h.value] = h.valueCount
  46. h.value = 0
  47. h.valueCount = -1
  48. }
  49. }
  50. func log2(i int64) int {
  51. n := 0
  52. for ; i >= 0x100; i >>= 8 {
  53. n += 8
  54. }
  55. for ; i > 0; i >>= 1 {
  56. n += 1
  57. }
  58. return n
  59. }
  60. func getBucket(i int64) (index int) {
  61. index = log2(i) - 1
  62. if index < 0 {
  63. index = 0
  64. }
  65. if index >= bucketCount {
  66. index = bucketCount - 1
  67. }
  68. return
  69. }
  70. // Total returns the number of recorded observations.
  71. func (h *histogram) total() (total int64) {
  72. if h.valueCount >= 0 {
  73. total = h.valueCount
  74. }
  75. for _, val := range h.buckets {
  76. total += int64(val)
  77. }
  78. return
  79. }
  80. // Average returns the average value of recorded observations.
  81. func (h *histogram) average() float64 {
  82. t := h.total()
  83. if t == 0 {
  84. return 0
  85. }
  86. return float64(h.sum) / float64(t)
  87. }
  88. // Variance returns the variance of recorded observations.
  89. func (h *histogram) variance() float64 {
  90. t := float64(h.total())
  91. if t == 0 {
  92. return 0
  93. }
  94. s := float64(h.sum) / t
  95. return h.sumOfSquares/t - s*s
  96. }
  97. // StandardDeviation returns the standard deviation of recorded observations.
  98. func (h *histogram) standardDeviation() float64 {
  99. return math.Sqrt(h.variance())
  100. }
  101. // PercentileBoundary estimates the value that the given fraction of recorded
  102. // observations are less than.
  103. func (h *histogram) percentileBoundary(percentile float64) int64 {
  104. total := h.total()
  105. // Corner cases (make sure result is strictly less than Total())
  106. if total == 0 {
  107. return 0
  108. } else if total == 1 {
  109. return int64(h.average())
  110. }
  111. percentOfTotal := round(float64(total) * percentile)
  112. var runningTotal int64
  113. for i := range h.buckets {
  114. value := h.buckets[i]
  115. runningTotal += value
  116. if runningTotal == percentOfTotal {
  117. // We hit an exact bucket boundary. If the next bucket has data, it is a
  118. // good estimate of the value. If the bucket is empty, we interpolate the
  119. // midpoint between the next bucket's boundary and the next non-zero
  120. // bucket. If the remaining buckets are all empty, then we use the
  121. // boundary for the next bucket as the estimate.
  122. j := uint8(i + 1)
  123. min := bucketBoundary(j)
  124. if runningTotal < total {
  125. for h.buckets[j] == 0 {
  126. j++
  127. }
  128. }
  129. max := bucketBoundary(j)
  130. return min + round(float64(max-min)/2)
  131. } else if runningTotal > percentOfTotal {
  132. // The value is in this bucket. Interpolate the value.
  133. delta := runningTotal - percentOfTotal
  134. percentBucket := float64(value-delta) / float64(value)
  135. bucketMin := bucketBoundary(uint8(i))
  136. nextBucketMin := bucketBoundary(uint8(i + 1))
  137. bucketSize := nextBucketMin - bucketMin
  138. return bucketMin + round(percentBucket*float64(bucketSize))
  139. }
  140. }
  141. return bucketBoundary(bucketCount - 1)
  142. }
  143. // Median returns the estimated median of the observed values.
  144. func (h *histogram) median() int64 {
  145. return h.percentileBoundary(0.5)
  146. }
  147. // Add adds other to h.
  148. func (h *histogram) Add(other timeseries.Observable) {
  149. o := other.(*histogram)
  150. if o.valueCount == 0 {
  151. // Other histogram is empty
  152. } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
  153. // Both have a single bucketed value, aggregate them
  154. h.valueCount += o.valueCount
  155. } else {
  156. // Two different values necessitate buckets in this histogram
  157. h.allocateBuckets()
  158. if o.valueCount >= 0 {
  159. h.buckets[o.value] += o.valueCount
  160. } else {
  161. for i := range h.buckets {
  162. h.buckets[i] += o.buckets[i]
  163. }
  164. }
  165. }
  166. h.sumOfSquares += o.sumOfSquares
  167. h.sum += o.sum
  168. }
  169. // Clear resets the histogram to an empty state, removing all observed values.
  170. func (h *histogram) Clear() {
  171. h.buckets = nil
  172. h.value = 0
  173. h.valueCount = 0
  174. h.sum = 0
  175. h.sumOfSquares = 0
  176. }
  177. // CopyFrom copies from other, which must be a *histogram, into h.
  178. func (h *histogram) CopyFrom(other timeseries.Observable) {
  179. o := other.(*histogram)
  180. if o.valueCount == -1 {
  181. h.allocateBuckets()
  182. copy(h.buckets, o.buckets)
  183. }
  184. h.sum = o.sum
  185. h.sumOfSquares = o.sumOfSquares
  186. h.value = o.value
  187. h.valueCount = o.valueCount
  188. }
  189. // Multiply scales the histogram by the specified ratio.
  190. func (h *histogram) Multiply(ratio float64) {
  191. if h.valueCount == -1 {
  192. for i := range h.buckets {
  193. h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
  194. }
  195. } else {
  196. h.valueCount = int64(float64(h.valueCount) * ratio)
  197. }
  198. h.sum = int64(float64(h.sum) * ratio)
  199. h.sumOfSquares = h.sumOfSquares * ratio
  200. }
  201. // New creates a new histogram.
  202. func (h *histogram) New() timeseries.Observable {
  203. r := new(histogram)
  204. r.Clear()
  205. return r
  206. }
  207. func (h *histogram) String() string {
  208. return fmt.Sprintf("%d, %f, %d, %d, %v",
  209. h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
  210. }
  211. // round returns the closest int64 to the argument
  212. func round(in float64) int64 {
  213. return int64(math.Floor(in + 0.5))
  214. }
  215. // bucketBoundary returns the first value in the bucket.
  216. func bucketBoundary(bucket uint8) int64 {
  217. if bucket == 0 {
  218. return 0
  219. }
  220. return 1 << bucket
  221. }
  222. // bucketData holds data about a specific bucket for use in distTmpl.
  223. type bucketData struct {
  224. Lower, Upper int64
  225. N int64
  226. Pct, CumulativePct float64
  227. GraphWidth int
  228. }
  229. // data holds data about a Distribution for use in distTmpl.
  230. type data struct {
  231. Buckets []*bucketData
  232. Count, Median int64
  233. Mean, StandardDeviation float64
  234. }
  235. // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
  236. const maxHTMLBarWidth = 350.0
  237. // newData returns data representing h for use in distTmpl.
  238. func (h *histogram) newData() *data {
  239. // Force the allocation of buckets to simplify the rendering implementation
  240. h.allocateBuckets()
  241. // We scale the bars on the right so that the largest bar is
  242. // maxHTMLBarWidth pixels in width.
  243. maxBucket := int64(0)
  244. for _, n := range h.buckets {
  245. if n > maxBucket {
  246. maxBucket = n
  247. }
  248. }
  249. total := h.total()
  250. barsizeMult := maxHTMLBarWidth / float64(maxBucket)
  251. var pctMult float64
  252. if total == 0 {
  253. pctMult = 1.0
  254. } else {
  255. pctMult = 100.0 / float64(total)
  256. }
  257. buckets := make([]*bucketData, len(h.buckets))
  258. runningTotal := int64(0)
  259. for i, n := range h.buckets {
  260. if n == 0 {
  261. continue
  262. }
  263. runningTotal += n
  264. var upperBound int64
  265. if i < bucketCount-1 {
  266. upperBound = bucketBoundary(uint8(i + 1))
  267. } else {
  268. upperBound = math.MaxInt64
  269. }
  270. buckets[i] = &bucketData{
  271. Lower: bucketBoundary(uint8(i)),
  272. Upper: upperBound,
  273. N: n,
  274. Pct: float64(n) * pctMult,
  275. CumulativePct: float64(runningTotal) * pctMult,
  276. GraphWidth: int(float64(n) * barsizeMult),
  277. }
  278. }
  279. return &data{
  280. Buckets: buckets,
  281. Count: total,
  282. Median: h.median(),
  283. Mean: h.average(),
  284. StandardDeviation: h.standardDeviation(),
  285. }
  286. }
  287. func (h *histogram) html() template.HTML {
  288. buf := new(bytes.Buffer)
  289. if err := distTmpl().Execute(buf, h.newData()); err != nil {
  290. buf.Reset()
  291. log.Printf("net/trace: couldn't execute template: %v", err)
  292. }
  293. return template.HTML(buf.String())
  294. }
  295. var distTmplCache *template.Template
  296. var distTmplOnce sync.Once
  297. func distTmpl() *template.Template {
  298. distTmplOnce.Do(func() {
  299. // Input: data
  300. distTmplCache = template.Must(template.New("distTmpl").Parse(`
  301. <table>
  302. <tr>
  303. <td style="padding:0.25em">Count: {{.Count}}</td>
  304. <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
  305. <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
  306. <td style="padding:0.25em">Median: {{.Median}}</td>
  307. </tr>
  308. </table>
  309. <hr>
  310. <table>
  311. {{range $b := .Buckets}}
  312. {{if $b}}
  313. <tr>
  314. <td style="padding:0 0 0 0.25em">[</td>
  315. <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
  316. <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
  317. <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
  318. <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
  319. <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
  320. <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
  321. </tr>
  322. {{end}}
  323. {{end}}
  324. </table>
  325. `))
  326. })
  327. return distTmplCache
  328. }