report.py 55 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619
  1. # type: ignore
  2. """
  3. report.py - Utilities for reporting statistics about benchmark results
  4. """
  5. import copy
  6. import os
  7. import random
  8. import re
  9. import unittest
  10. from numpy import array
  11. from scipy.stats import gmean, mannwhitneyu
  12. class BenchmarkColor(object):
  13. def __init__(self, name, code):
  14. self.name = name
  15. self.code = code
  16. def __repr__(self):
  17. return "%s%r" % (self.__class__.__name__, (self.name, self.code))
  18. def __format__(self, format):
  19. return self.code
  20. # Benchmark Colors Enumeration
  21. BC_NONE = BenchmarkColor("NONE", "")
  22. BC_MAGENTA = BenchmarkColor("MAGENTA", "\033[95m")
  23. BC_CYAN = BenchmarkColor("CYAN", "\033[96m")
  24. BC_OKBLUE = BenchmarkColor("OKBLUE", "\033[94m")
  25. BC_OKGREEN = BenchmarkColor("OKGREEN", "\033[32m")
  26. BC_HEADER = BenchmarkColor("HEADER", "\033[92m")
  27. BC_WARNING = BenchmarkColor("WARNING", "\033[93m")
  28. BC_WHITE = BenchmarkColor("WHITE", "\033[97m")
  29. BC_FAIL = BenchmarkColor("FAIL", "\033[91m")
  30. BC_ENDC = BenchmarkColor("ENDC", "\033[0m")
  31. BC_BOLD = BenchmarkColor("BOLD", "\033[1m")
  32. BC_UNDERLINE = BenchmarkColor("UNDERLINE", "\033[4m")
  33. UTEST_MIN_REPETITIONS = 2
  34. UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
  35. UTEST_COL_NAME = "_pvalue"
  36. _TIME_UNIT_TO_SECONDS_MULTIPLIER = {
  37. "s": 1.0,
  38. "ms": 1e-3,
  39. "us": 1e-6,
  40. "ns": 1e-9,
  41. }
  42. def color_format(use_color, fmt_str, *args, **kwargs):
  43. """
  44. Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
  45. 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
  46. is False then all color codes in 'args' and 'kwargs' are replaced with
  47. the empty string.
  48. """
  49. assert use_color is True or use_color is False
  50. if not use_color:
  51. args = [
  52. arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  53. for arg in args
  54. ]
  55. kwargs = {
  56. key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  57. for key, arg in kwargs.items()
  58. }
  59. return fmt_str.format(*args, **kwargs)
  60. def find_longest_name(benchmark_list):
  61. """
  62. Return the length of the longest benchmark name in a given list of
  63. benchmark JSON objects
  64. """
  65. longest_name = 1
  66. for bc in benchmark_list:
  67. if len(bc["name"]) > longest_name:
  68. longest_name = len(bc["name"])
  69. return longest_name
  70. def calculate_change(old_val, new_val):
  71. """
  72. Return a float representing the decimal change between old_val and new_val.
  73. """
  74. if old_val == 0 and new_val == 0:
  75. return 0.0
  76. if old_val == 0:
  77. return float(new_val - old_val) / (float(old_val + new_val) / 2)
  78. return float(new_val - old_val) / abs(old_val)
  79. def filter_benchmark(json_orig, family, replacement=""):
  80. """
  81. Apply a filter to the json, and only leave the 'family' of benchmarks.
  82. """
  83. regex = re.compile(family)
  84. filtered = {}
  85. filtered["benchmarks"] = []
  86. for be in json_orig["benchmarks"]:
  87. if not regex.search(be["name"]):
  88. continue
  89. filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
  90. filteredbench["name"] = regex.sub(replacement, filteredbench["name"])
  91. filtered["benchmarks"].append(filteredbench)
  92. return filtered
  93. def get_unique_benchmark_names(json):
  94. """
  95. While *keeping* the order, give all the unique 'names' used for benchmarks.
  96. """
  97. seen = set()
  98. uniqued = [
  99. x["name"]
  100. for x in json["benchmarks"]
  101. if x["name"] not in seen and (seen.add(x["name"]) or True)
  102. ]
  103. return uniqued
  104. def intersect(list1, list2):
  105. """
  106. Given two lists, get a new list consisting of the elements only contained
  107. in *both of the input lists*, while preserving the ordering.
  108. """
  109. return [x for x in list1 if x in list2]
  110. def is_potentially_comparable_benchmark(x):
  111. return "time_unit" in x and "real_time" in x and "cpu_time" in x
  112. def partition_benchmarks(json1, json2):
  113. """
  114. While preserving the ordering, find benchmarks with the same names in
  115. both of the inputs, and group them.
  116. (i.e. partition/filter into groups with common name)
  117. """
  118. json1_unique_names = get_unique_benchmark_names(json1)
  119. json2_unique_names = get_unique_benchmark_names(json2)
  120. names = intersect(json1_unique_names, json2_unique_names)
  121. partitions = []
  122. for name in names:
  123. time_unit = None
  124. # Pick the time unit from the first entry of the lhs benchmark.
  125. # We should be careful not to crash with unexpected input.
  126. for x in json1["benchmarks"]:
  127. if x["name"] == name and is_potentially_comparable_benchmark(x):
  128. time_unit = x["time_unit"]
  129. break
  130. if time_unit is None:
  131. continue
  132. # Filter by name and time unit.
  133. # All the repetitions are assumed to be comparable.
  134. lhs = [
  135. x
  136. for x in json1["benchmarks"]
  137. if x["name"] == name and x["time_unit"] == time_unit
  138. ]
  139. rhs = [
  140. x
  141. for x in json2["benchmarks"]
  142. if x["name"] == name and x["time_unit"] == time_unit
  143. ]
  144. partitions.append([lhs, rhs])
  145. return partitions
  146. def get_timedelta_field_as_seconds(benchmark, field_name):
  147. """
  148. Get value of field_name field of benchmark, which is time with time unit
  149. time_unit, as time in seconds.
  150. """
  151. timedelta = benchmark[field_name]
  152. time_unit = benchmark.get("time_unit", "s")
  153. return timedelta * _TIME_UNIT_TO_SECONDS_MULTIPLIER.get(time_unit)
  154. def calculate_geomean(json):
  155. """
  156. Extract all real/cpu times from all the benchmarks as seconds,
  157. and calculate their geomean.
  158. """
  159. times = []
  160. for benchmark in json["benchmarks"]:
  161. if "run_type" in benchmark and benchmark["run_type"] == "aggregate":
  162. continue
  163. times.append(
  164. [
  165. get_timedelta_field_as_seconds(benchmark, "real_time"),
  166. get_timedelta_field_as_seconds(benchmark, "cpu_time"),
  167. ]
  168. )
  169. return gmean(times) if times else array([])
  170. def extract_field(partition, field_name):
  171. # The count of elements may be different. We want *all* of them.
  172. lhs = [x[field_name] for x in partition[0]]
  173. rhs = [x[field_name] for x in partition[1]]
  174. return [lhs, rhs]
  175. def calc_utest(timings_cpu, timings_time):
  176. min_rep_cnt = min(
  177. len(timings_time[0]),
  178. len(timings_time[1]),
  179. len(timings_cpu[0]),
  180. len(timings_cpu[1]),
  181. )
  182. # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
  183. if min_rep_cnt < UTEST_MIN_REPETITIONS:
  184. return False, None, None
  185. time_pvalue = mannwhitneyu(
  186. timings_time[0], timings_time[1], alternative="two-sided"
  187. ).pvalue
  188. cpu_pvalue = mannwhitneyu(
  189. timings_cpu[0], timings_cpu[1], alternative="two-sided"
  190. ).pvalue
  191. return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
  192. def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
  193. def get_utest_color(pval):
  194. return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
  195. # Check if we failed miserably with minimum required repetitions for utest
  196. if (
  197. not utest["have_optimal_repetitions"]
  198. and utest["cpu_pvalue"] is None
  199. and utest["time_pvalue"] is None
  200. ):
  201. return []
  202. dsc = "U Test, Repetitions: {} vs {}".format(
  203. utest["nr_of_repetitions"], utest["nr_of_repetitions_other"]
  204. )
  205. dsc_color = BC_OKGREEN
  206. # We still got some results to show but issue a warning about it.
  207. if not utest["have_optimal_repetitions"]:
  208. dsc_color = BC_WARNING
  209. dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
  210. UTEST_OPTIMAL_REPETITIONS
  211. )
  212. special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
  213. return [
  214. color_format(
  215. use_color,
  216. special_str,
  217. BC_HEADER,
  218. "{}{}".format(bc_name, UTEST_COL_NAME),
  219. first_col_width,
  220. get_utest_color(utest["time_pvalue"]),
  221. utest["time_pvalue"],
  222. get_utest_color(utest["cpu_pvalue"]),
  223. utest["cpu_pvalue"],
  224. dsc_color,
  225. dsc,
  226. endc=BC_ENDC,
  227. )
  228. ]
  229. def get_difference_report(json1, json2, utest=False):
  230. """
  231. Calculate and report the difference between each test of two benchmarks
  232. runs specified as 'json1' and 'json2'. Output is another json containing
  233. relevant details for each test run.
  234. """
  235. assert utest is True or utest is False
  236. diff_report = []
  237. partitions = partition_benchmarks(json1, json2)
  238. for partition in partitions:
  239. benchmark_name = partition[0][0]["name"]
  240. label = partition[0][0]["label"] if "label" in partition[0][0] else ""
  241. time_unit = partition[0][0]["time_unit"]
  242. measurements = []
  243. utest_results = {}
  244. # Careful, we may have different repetition count.
  245. for i in range(min(len(partition[0]), len(partition[1]))):
  246. bn = partition[0][i]
  247. other_bench = partition[1][i]
  248. measurements.append(
  249. {
  250. "real_time": bn["real_time"],
  251. "cpu_time": bn["cpu_time"],
  252. "real_time_other": other_bench["real_time"],
  253. "cpu_time_other": other_bench["cpu_time"],
  254. "time": calculate_change(
  255. bn["real_time"], other_bench["real_time"]
  256. ),
  257. "cpu": calculate_change(
  258. bn["cpu_time"], other_bench["cpu_time"]
  259. ),
  260. }
  261. )
  262. # After processing the whole partition, if requested, do the U test.
  263. if utest:
  264. timings_cpu = extract_field(partition, "cpu_time")
  265. timings_time = extract_field(partition, "real_time")
  266. have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(
  267. timings_cpu, timings_time
  268. )
  269. if cpu_pvalue is not None and time_pvalue is not None:
  270. utest_results = {
  271. "have_optimal_repetitions": have_optimal_repetitions,
  272. "cpu_pvalue": cpu_pvalue,
  273. "time_pvalue": time_pvalue,
  274. "nr_of_repetitions": len(timings_cpu[0]),
  275. "nr_of_repetitions_other": len(timings_cpu[1]),
  276. }
  277. # Store only if we had any measurements for given benchmark.
  278. # E.g. partition_benchmarks will filter out the benchmarks having
  279. # time units which are not compatible with other time units in the
  280. # benchmark suite.
  281. if measurements:
  282. run_type = (
  283. partition[0][0]["run_type"]
  284. if "run_type" in partition[0][0]
  285. else ""
  286. )
  287. aggregate_name = (
  288. partition[0][0]["aggregate_name"]
  289. if run_type == "aggregate"
  290. and "aggregate_name" in partition[0][0]
  291. else ""
  292. )
  293. diff_report.append(
  294. {
  295. "name": benchmark_name,
  296. "label": label,
  297. "measurements": measurements,
  298. "time_unit": time_unit,
  299. "run_type": run_type,
  300. "aggregate_name": aggregate_name,
  301. "utest": utest_results,
  302. }
  303. )
  304. lhs_gmean = calculate_geomean(json1)
  305. rhs_gmean = calculate_geomean(json2)
  306. if lhs_gmean.any() and rhs_gmean.any():
  307. diff_report.append(
  308. {
  309. "name": "OVERALL_GEOMEAN",
  310. "label": "",
  311. "measurements": [
  312. {
  313. "real_time": lhs_gmean[0],
  314. "cpu_time": lhs_gmean[1],
  315. "real_time_other": rhs_gmean[0],
  316. "cpu_time_other": rhs_gmean[1],
  317. "time": calculate_change(lhs_gmean[0], rhs_gmean[0]),
  318. "cpu": calculate_change(lhs_gmean[1], rhs_gmean[1]),
  319. }
  320. ],
  321. "time_unit": "s",
  322. "run_type": "aggregate",
  323. "aggregate_name": "geomean",
  324. "utest": {},
  325. }
  326. )
  327. return diff_report
  328. def print_difference_report(
  329. json_diff_report,
  330. include_aggregates_only=False,
  331. utest=False,
  332. utest_alpha=0.05,
  333. use_color=True,
  334. ):
  335. """
  336. Calculate and report the difference between each test of two benchmarks
  337. runs specified as 'json1' and 'json2'.
  338. """
  339. assert utest is True or utest is False
  340. def get_color(res):
  341. if res > 0.05:
  342. return BC_FAIL
  343. elif res > -0.07:
  344. return BC_WHITE
  345. else:
  346. return BC_CYAN
  347. first_col_width = find_longest_name(json_diff_report)
  348. first_col_width = max(first_col_width, len("Benchmark"))
  349. first_col_width += len(UTEST_COL_NAME)
  350. first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
  351. "Benchmark", 12 + first_col_width
  352. )
  353. output_strs = [first_line, "-" * len(first_line)]
  354. fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
  355. for benchmark in json_diff_report:
  356. # *If* we were asked to only include aggregates,
  357. # and if it is non-aggregate, then don't print it.
  358. if (
  359. not include_aggregates_only
  360. or "run_type" not in benchmark
  361. or benchmark["run_type"] == "aggregate"
  362. ):
  363. for measurement in benchmark["measurements"]:
  364. output_strs += [
  365. color_format(
  366. use_color,
  367. fmt_str,
  368. BC_HEADER,
  369. benchmark["name"],
  370. first_col_width,
  371. get_color(measurement["time"]),
  372. measurement["time"],
  373. get_color(measurement["cpu"]),
  374. measurement["cpu"],
  375. measurement["real_time"],
  376. measurement["real_time_other"],
  377. measurement["cpu_time"],
  378. measurement["cpu_time_other"],
  379. endc=BC_ENDC,
  380. )
  381. ]
  382. # After processing the measurements, if requested and
  383. # if applicable (e.g. u-test exists for given benchmark),
  384. # print the U test.
  385. if utest and benchmark["utest"]:
  386. output_strs += print_utest(
  387. benchmark["name"],
  388. benchmark["utest"],
  389. utest_alpha=utest_alpha,
  390. first_col_width=first_col_width,
  391. use_color=use_color,
  392. )
  393. return output_strs
  394. ###############################################################################
  395. # Unit tests
  396. class TestGetUniqueBenchmarkNames(unittest.TestCase):
  397. def load_results(self):
  398. import json
  399. testInputs = os.path.join(
  400. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  401. )
  402. testOutput = os.path.join(testInputs, "test3_run0.json")
  403. with open(testOutput, "r") as f:
  404. json = json.load(f)
  405. return json
  406. def test_basic(self):
  407. expect_lines = [
  408. "BM_One",
  409. "BM_Two",
  410. "short", # These two are not sorted
  411. "medium", # These two are not sorted
  412. ]
  413. json = self.load_results()
  414. output_lines = get_unique_benchmark_names(json)
  415. print("\n")
  416. print("\n".join(output_lines))
  417. self.assertEqual(len(output_lines), len(expect_lines))
  418. for i in range(0, len(output_lines)):
  419. self.assertEqual(expect_lines[i], output_lines[i])
  420. class TestReportDifference(unittest.TestCase):
  421. @classmethod
  422. def setUpClass(cls):
  423. def load_results():
  424. import json
  425. testInputs = os.path.join(
  426. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  427. )
  428. testOutput1 = os.path.join(testInputs, "test1_run1.json")
  429. testOutput2 = os.path.join(testInputs, "test1_run2.json")
  430. with open(testOutput1, "r") as f:
  431. json1 = json.load(f)
  432. with open(testOutput2, "r") as f:
  433. json2 = json.load(f)
  434. return json1, json2
  435. json1, json2 = load_results()
  436. cls.json_diff_report = get_difference_report(json1, json2)
  437. def test_json_diff_report_pretty_printing(self):
  438. expect_lines = [
  439. ["BM_SameTimes", "+0.0000", "+0.0000", "10", "10", "10", "10"],
  440. ["BM_2xFaster", "-0.5000", "-0.5000", "50", "25", "50", "25"],
  441. ["BM_2xSlower", "+1.0000", "+1.0000", "50", "100", "50", "100"],
  442. [
  443. "BM_1PercentFaster",
  444. "-0.0100",
  445. "-0.0100",
  446. "100",
  447. "99",
  448. "100",
  449. "99",
  450. ],
  451. [
  452. "BM_1PercentSlower",
  453. "+0.0100",
  454. "+0.0100",
  455. "100",
  456. "101",
  457. "100",
  458. "101",
  459. ],
  460. [
  461. "BM_10PercentFaster",
  462. "-0.1000",
  463. "-0.1000",
  464. "100",
  465. "90",
  466. "100",
  467. "90",
  468. ],
  469. [
  470. "BM_10PercentSlower",
  471. "+0.1000",
  472. "+0.1000",
  473. "100",
  474. "110",
  475. "100",
  476. "110",
  477. ],
  478. [
  479. "BM_100xSlower",
  480. "+99.0000",
  481. "+99.0000",
  482. "100",
  483. "10000",
  484. "100",
  485. "10000",
  486. ],
  487. [
  488. "BM_100xFaster",
  489. "-0.9900",
  490. "-0.9900",
  491. "10000",
  492. "100",
  493. "10000",
  494. "100",
  495. ],
  496. [
  497. "BM_10PercentCPUToTime",
  498. "+0.1000",
  499. "-0.1000",
  500. "100",
  501. "110",
  502. "100",
  503. "90",
  504. ],
  505. ["BM_ThirdFaster", "-0.3333", "-0.3334", "100", "67", "100", "67"],
  506. ["BM_NotBadTimeUnit", "-0.9000", "+0.2000", "0", "0", "0", "1"],
  507. ["BM_hasLabel", "+0.0000", "+0.0000", "1", "1", "1", "1"],
  508. ["OVERALL_GEOMEAN", "-0.8113", "-0.7779", "0", "0", "0", "0"],
  509. ]
  510. output_lines_with_header = print_difference_report(
  511. self.json_diff_report, use_color=False
  512. )
  513. output_lines = output_lines_with_header[2:]
  514. print("\n")
  515. print("\n".join(output_lines_with_header))
  516. self.assertEqual(len(output_lines), len(expect_lines))
  517. for i in range(0, len(output_lines)):
  518. parts = [x for x in output_lines[i].split(" ") if x]
  519. self.assertEqual(len(parts), 7)
  520. self.assertEqual(expect_lines[i], parts)
  521. def test_json_diff_report_output(self):
  522. expected_output = [
  523. {
  524. "name": "BM_SameTimes",
  525. "label": "",
  526. "measurements": [
  527. {
  528. "time": 0.0000,
  529. "cpu": 0.0000,
  530. "real_time": 10,
  531. "real_time_other": 10,
  532. "cpu_time": 10,
  533. "cpu_time_other": 10,
  534. }
  535. ],
  536. "time_unit": "ns",
  537. "utest": {},
  538. },
  539. {
  540. "name": "BM_2xFaster",
  541. "label": "",
  542. "measurements": [
  543. {
  544. "time": -0.5000,
  545. "cpu": -0.5000,
  546. "real_time": 50,
  547. "real_time_other": 25,
  548. "cpu_time": 50,
  549. "cpu_time_other": 25,
  550. }
  551. ],
  552. "time_unit": "ns",
  553. "utest": {},
  554. },
  555. {
  556. "name": "BM_2xSlower",
  557. "label": "",
  558. "measurements": [
  559. {
  560. "time": 1.0000,
  561. "cpu": 1.0000,
  562. "real_time": 50,
  563. "real_time_other": 100,
  564. "cpu_time": 50,
  565. "cpu_time_other": 100,
  566. }
  567. ],
  568. "time_unit": "ns",
  569. "utest": {},
  570. },
  571. {
  572. "name": "BM_1PercentFaster",
  573. "label": "",
  574. "measurements": [
  575. {
  576. "time": -0.0100,
  577. "cpu": -0.0100,
  578. "real_time": 100,
  579. "real_time_other": 98.9999999,
  580. "cpu_time": 100,
  581. "cpu_time_other": 98.9999999,
  582. }
  583. ],
  584. "time_unit": "ns",
  585. "utest": {},
  586. },
  587. {
  588. "name": "BM_1PercentSlower",
  589. "label": "",
  590. "measurements": [
  591. {
  592. "time": 0.0100,
  593. "cpu": 0.0100,
  594. "real_time": 100,
  595. "real_time_other": 101,
  596. "cpu_time": 100,
  597. "cpu_time_other": 101,
  598. }
  599. ],
  600. "time_unit": "ns",
  601. "utest": {},
  602. },
  603. {
  604. "name": "BM_10PercentFaster",
  605. "label": "",
  606. "measurements": [
  607. {
  608. "time": -0.1000,
  609. "cpu": -0.1000,
  610. "real_time": 100,
  611. "real_time_other": 90,
  612. "cpu_time": 100,
  613. "cpu_time_other": 90,
  614. }
  615. ],
  616. "time_unit": "ns",
  617. "utest": {},
  618. },
  619. {
  620. "name": "BM_10PercentSlower",
  621. "label": "",
  622. "measurements": [
  623. {
  624. "time": 0.1000,
  625. "cpu": 0.1000,
  626. "real_time": 100,
  627. "real_time_other": 110,
  628. "cpu_time": 100,
  629. "cpu_time_other": 110,
  630. }
  631. ],
  632. "time_unit": "ns",
  633. "utest": {},
  634. },
  635. {
  636. "name": "BM_100xSlower",
  637. "label": "",
  638. "measurements": [
  639. {
  640. "time": 99.0000,
  641. "cpu": 99.0000,
  642. "real_time": 100,
  643. "real_time_other": 10000,
  644. "cpu_time": 100,
  645. "cpu_time_other": 10000,
  646. }
  647. ],
  648. "time_unit": "ns",
  649. "utest": {},
  650. },
  651. {
  652. "name": "BM_100xFaster",
  653. "label": "",
  654. "measurements": [
  655. {
  656. "time": -0.9900,
  657. "cpu": -0.9900,
  658. "real_time": 10000,
  659. "real_time_other": 100,
  660. "cpu_time": 10000,
  661. "cpu_time_other": 100,
  662. }
  663. ],
  664. "time_unit": "ns",
  665. "utest": {},
  666. },
  667. {
  668. "name": "BM_10PercentCPUToTime",
  669. "label": "",
  670. "measurements": [
  671. {
  672. "time": 0.1000,
  673. "cpu": -0.1000,
  674. "real_time": 100,
  675. "real_time_other": 110,
  676. "cpu_time": 100,
  677. "cpu_time_other": 90,
  678. }
  679. ],
  680. "time_unit": "ns",
  681. "utest": {},
  682. },
  683. {
  684. "name": "BM_ThirdFaster",
  685. "label": "",
  686. "measurements": [
  687. {
  688. "time": -0.3333,
  689. "cpu": -0.3334,
  690. "real_time": 100,
  691. "real_time_other": 67,
  692. "cpu_time": 100,
  693. "cpu_time_other": 67,
  694. }
  695. ],
  696. "time_unit": "ns",
  697. "utest": {},
  698. },
  699. {
  700. "name": "BM_NotBadTimeUnit",
  701. "label": "",
  702. "measurements": [
  703. {
  704. "time": -0.9000,
  705. "cpu": 0.2000,
  706. "real_time": 0.4,
  707. "real_time_other": 0.04,
  708. "cpu_time": 0.5,
  709. "cpu_time_other": 0.6,
  710. }
  711. ],
  712. "time_unit": "s",
  713. "utest": {},
  714. },
  715. {
  716. "name": "BM_hasLabel",
  717. "label": "a label",
  718. "measurements": [
  719. {
  720. "time": 0.0000,
  721. "cpu": 0.0000,
  722. "real_time": 1,
  723. "real_time_other": 1,
  724. "cpu_time": 1,
  725. "cpu_time_other": 1,
  726. }
  727. ],
  728. "time_unit": "s",
  729. "utest": {},
  730. },
  731. {
  732. "name": "OVERALL_GEOMEAN",
  733. "label": "",
  734. "measurements": [
  735. {
  736. "real_time": 3.1622776601683826e-06,
  737. "cpu_time": 3.2130844755623912e-06,
  738. "real_time_other": 1.9768988699420897e-07,
  739. "cpu_time_other": 2.397447755209533e-07,
  740. "time": -0.8112976497120911,
  741. "cpu": -0.7778551721181174,
  742. }
  743. ],
  744. "time_unit": "s",
  745. "run_type": "aggregate",
  746. "aggregate_name": "geomean",
  747. "utest": {},
  748. },
  749. ]
  750. self.assertEqual(len(self.json_diff_report), len(expected_output))
  751. for out, expected in zip(self.json_diff_report, expected_output):
  752. self.assertEqual(out["name"], expected["name"])
  753. self.assertEqual(out["label"], expected["label"])
  754. self.assertEqual(out["time_unit"], expected["time_unit"])
  755. assert_utest(self, out, expected)
  756. assert_measurements(self, out, expected)
  757. class TestReportDifferenceBetweenFamilies(unittest.TestCase):
  758. @classmethod
  759. def setUpClass(cls):
  760. def load_result():
  761. import json
  762. testInputs = os.path.join(
  763. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  764. )
  765. testOutput = os.path.join(testInputs, "test2_run.json")
  766. with open(testOutput, "r") as f:
  767. json = json.load(f)
  768. return json
  769. json = load_result()
  770. json1 = filter_benchmark(json, "BM_Z.ro", ".")
  771. json2 = filter_benchmark(json, "BM_O.e", ".")
  772. cls.json_diff_report = get_difference_report(json1, json2)
  773. def test_json_diff_report_pretty_printing(self):
  774. expect_lines = [
  775. [".", "-0.5000", "-0.5000", "10", "5", "10", "5"],
  776. ["./4", "-0.5000", "-0.5000", "40", "20", "40", "20"],
  777. ["Prefix/.", "-0.5000", "-0.5000", "20", "10", "20", "10"],
  778. ["Prefix/./3", "-0.5000", "-0.5000", "30", "15", "30", "15"],
  779. ["OVERALL_GEOMEAN", "-0.5000", "-0.5000", "0", "0", "0", "0"],
  780. ]
  781. output_lines_with_header = print_difference_report(
  782. self.json_diff_report, use_color=False
  783. )
  784. output_lines = output_lines_with_header[2:]
  785. print("\n")
  786. print("\n".join(output_lines_with_header))
  787. self.assertEqual(len(output_lines), len(expect_lines))
  788. for i in range(0, len(output_lines)):
  789. parts = [x for x in output_lines[i].split(" ") if x]
  790. self.assertEqual(len(parts), 7)
  791. self.assertEqual(expect_lines[i], parts)
  792. def test_json_diff_report(self):
  793. expected_output = [
  794. {
  795. "name": ".",
  796. "measurements": [
  797. {
  798. "time": -0.5,
  799. "cpu": -0.5,
  800. "real_time": 10,
  801. "real_time_other": 5,
  802. "cpu_time": 10,
  803. "cpu_time_other": 5,
  804. }
  805. ],
  806. "time_unit": "ns",
  807. "utest": {},
  808. },
  809. {
  810. "name": "./4",
  811. "measurements": [
  812. {
  813. "time": -0.5,
  814. "cpu": -0.5,
  815. "real_time": 40,
  816. "real_time_other": 20,
  817. "cpu_time": 40,
  818. "cpu_time_other": 20,
  819. }
  820. ],
  821. "time_unit": "ns",
  822. "utest": {},
  823. },
  824. {
  825. "name": "Prefix/.",
  826. "measurements": [
  827. {
  828. "time": -0.5,
  829. "cpu": -0.5,
  830. "real_time": 20,
  831. "real_time_other": 10,
  832. "cpu_time": 20,
  833. "cpu_time_other": 10,
  834. }
  835. ],
  836. "time_unit": "ns",
  837. "utest": {},
  838. },
  839. {
  840. "name": "Prefix/./3",
  841. "measurements": [
  842. {
  843. "time": -0.5,
  844. "cpu": -0.5,
  845. "real_time": 30,
  846. "real_time_other": 15,
  847. "cpu_time": 30,
  848. "cpu_time_other": 15,
  849. }
  850. ],
  851. "time_unit": "ns",
  852. "utest": {},
  853. },
  854. {
  855. "name": "OVERALL_GEOMEAN",
  856. "measurements": [
  857. {
  858. "real_time": 2.213363839400641e-08,
  859. "cpu_time": 2.213363839400641e-08,
  860. "real_time_other": 1.1066819197003185e-08,
  861. "cpu_time_other": 1.1066819197003185e-08,
  862. "time": -0.5000000000000009,
  863. "cpu": -0.5000000000000009,
  864. }
  865. ],
  866. "time_unit": "s",
  867. "run_type": "aggregate",
  868. "aggregate_name": "geomean",
  869. "utest": {},
  870. },
  871. ]
  872. self.assertEqual(len(self.json_diff_report), len(expected_output))
  873. for out, expected in zip(self.json_diff_report, expected_output):
  874. self.assertEqual(out["name"], expected["name"])
  875. self.assertEqual(out["time_unit"], expected["time_unit"])
  876. assert_utest(self, out, expected)
  877. assert_measurements(self, out, expected)
  878. class TestReportDifferenceWithUTest(unittest.TestCase):
  879. @classmethod
  880. def setUpClass(cls):
  881. def load_results():
  882. import json
  883. testInputs = os.path.join(
  884. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  885. )
  886. testOutput1 = os.path.join(testInputs, "test3_run0.json")
  887. testOutput2 = os.path.join(testInputs, "test3_run1.json")
  888. with open(testOutput1, "r") as f:
  889. json1 = json.load(f)
  890. with open(testOutput2, "r") as f:
  891. json2 = json.load(f)
  892. return json1, json2
  893. json1, json2 = load_results()
  894. cls.json_diff_report = get_difference_report(json1, json2, utest=True)
  895. def test_json_diff_report_pretty_printing(self):
  896. expect_lines = [
  897. ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
  898. ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
  899. ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
  900. [
  901. "BM_Two_pvalue",
  902. "1.0000",
  903. "0.6667",
  904. "U",
  905. "Test,",
  906. "Repetitions:",
  907. "2",
  908. "vs",
  909. "2.",
  910. "WARNING:",
  911. "Results",
  912. "unreliable!",
  913. "9+",
  914. "repetitions",
  915. "recommended.",
  916. ],
  917. ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
  918. ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
  919. [
  920. "short_pvalue",
  921. "0.7671",
  922. "0.2000",
  923. "U",
  924. "Test,",
  925. "Repetitions:",
  926. "2",
  927. "vs",
  928. "3.",
  929. "WARNING:",
  930. "Results",
  931. "unreliable!",
  932. "9+",
  933. "repetitions",
  934. "recommended.",
  935. ],
  936. ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
  937. ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
  938. ]
  939. output_lines_with_header = print_difference_report(
  940. self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
  941. )
  942. output_lines = output_lines_with_header[2:]
  943. print("\n")
  944. print("\n".join(output_lines_with_header))
  945. self.assertEqual(len(output_lines), len(expect_lines))
  946. for i in range(0, len(output_lines)):
  947. parts = [x for x in output_lines[i].split(" ") if x]
  948. self.assertEqual(expect_lines[i], parts)
  949. def test_json_diff_report_pretty_printing_aggregates_only(self):
  950. expect_lines = [
  951. ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
  952. [
  953. "BM_Two_pvalue",
  954. "1.0000",
  955. "0.6667",
  956. "U",
  957. "Test,",
  958. "Repetitions:",
  959. "2",
  960. "vs",
  961. "2.",
  962. "WARNING:",
  963. "Results",
  964. "unreliable!",
  965. "9+",
  966. "repetitions",
  967. "recommended.",
  968. ],
  969. ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
  970. ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
  971. [
  972. "short_pvalue",
  973. "0.7671",
  974. "0.2000",
  975. "U",
  976. "Test,",
  977. "Repetitions:",
  978. "2",
  979. "vs",
  980. "3.",
  981. "WARNING:",
  982. "Results",
  983. "unreliable!",
  984. "9+",
  985. "repetitions",
  986. "recommended.",
  987. ],
  988. ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
  989. ]
  990. output_lines_with_header = print_difference_report(
  991. self.json_diff_report,
  992. include_aggregates_only=True,
  993. utest=True,
  994. utest_alpha=0.05,
  995. use_color=False,
  996. )
  997. output_lines = output_lines_with_header[2:]
  998. print("\n")
  999. print("\n".join(output_lines_with_header))
  1000. self.assertEqual(len(output_lines), len(expect_lines))
  1001. for i in range(0, len(output_lines)):
  1002. parts = [x for x in output_lines[i].split(" ") if x]
  1003. self.assertEqual(expect_lines[i], parts)
  1004. def test_json_diff_report(self):
  1005. expected_output = [
  1006. {
  1007. "name": "BM_One",
  1008. "measurements": [
  1009. {
  1010. "time": -0.1,
  1011. "cpu": 0.1,
  1012. "real_time": 10,
  1013. "real_time_other": 9,
  1014. "cpu_time": 100,
  1015. "cpu_time_other": 110,
  1016. }
  1017. ],
  1018. "time_unit": "ns",
  1019. "utest": {},
  1020. },
  1021. {
  1022. "name": "BM_Two",
  1023. "measurements": [
  1024. {
  1025. "time": 0.1111111111111111,
  1026. "cpu": -0.011111111111111112,
  1027. "real_time": 9,
  1028. "real_time_other": 10,
  1029. "cpu_time": 90,
  1030. "cpu_time_other": 89,
  1031. },
  1032. {
  1033. "time": -0.125,
  1034. "cpu": -0.16279069767441862,
  1035. "real_time": 8,
  1036. "real_time_other": 7,
  1037. "cpu_time": 86,
  1038. "cpu_time_other": 72,
  1039. },
  1040. ],
  1041. "time_unit": "ns",
  1042. "utest": {
  1043. "have_optimal_repetitions": False,
  1044. "cpu_pvalue": 0.6666666666666666,
  1045. "time_pvalue": 1.0,
  1046. },
  1047. },
  1048. {
  1049. "name": "short",
  1050. "measurements": [
  1051. {
  1052. "time": -0.125,
  1053. "cpu": -0.0625,
  1054. "real_time": 8,
  1055. "real_time_other": 7,
  1056. "cpu_time": 80,
  1057. "cpu_time_other": 75,
  1058. },
  1059. {
  1060. "time": -0.4325,
  1061. "cpu": -0.13506493506493514,
  1062. "real_time": 8,
  1063. "real_time_other": 4.54,
  1064. "cpu_time": 77,
  1065. "cpu_time_other": 66.6,
  1066. },
  1067. ],
  1068. "time_unit": "ns",
  1069. "utest": {
  1070. "have_optimal_repetitions": False,
  1071. "cpu_pvalue": 0.2,
  1072. "time_pvalue": 0.7670968684102772,
  1073. },
  1074. },
  1075. {
  1076. "name": "medium",
  1077. "measurements": [
  1078. {
  1079. "time": -0.375,
  1080. "cpu": -0.3375,
  1081. "real_time": 8,
  1082. "real_time_other": 5,
  1083. "cpu_time": 80,
  1084. "cpu_time_other": 53,
  1085. }
  1086. ],
  1087. "time_unit": "ns",
  1088. "utest": {},
  1089. },
  1090. {
  1091. "name": "OVERALL_GEOMEAN",
  1092. "measurements": [
  1093. {
  1094. "real_time": 8.48528137423858e-09,
  1095. "cpu_time": 8.441336246629233e-08,
  1096. "real_time_other": 2.2405267593145244e-08,
  1097. "cpu_time_other": 2.5453661413660466e-08,
  1098. "time": 1.6404861082353634,
  1099. "cpu": -0.6984640740519662,
  1100. }
  1101. ],
  1102. "time_unit": "s",
  1103. "run_type": "aggregate",
  1104. "aggregate_name": "geomean",
  1105. "utest": {},
  1106. },
  1107. ]
  1108. self.assertEqual(len(self.json_diff_report), len(expected_output))
  1109. for out, expected in zip(self.json_diff_report, expected_output):
  1110. self.assertEqual(out["name"], expected["name"])
  1111. self.assertEqual(out["time_unit"], expected["time_unit"])
  1112. assert_utest(self, out, expected)
  1113. assert_measurements(self, out, expected)
  1114. class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
  1115. unittest.TestCase
  1116. ):
  1117. @classmethod
  1118. def setUpClass(cls):
  1119. def load_results():
  1120. import json
  1121. testInputs = os.path.join(
  1122. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  1123. )
  1124. testOutput1 = os.path.join(testInputs, "test3_run0.json")
  1125. testOutput2 = os.path.join(testInputs, "test3_run1.json")
  1126. with open(testOutput1, "r") as f:
  1127. json1 = json.load(f)
  1128. with open(testOutput2, "r") as f:
  1129. json2 = json.load(f)
  1130. return json1, json2
  1131. json1, json2 = load_results()
  1132. cls.json_diff_report = get_difference_report(json1, json2, utest=True)
  1133. def test_json_diff_report_pretty_printing(self):
  1134. expect_lines = [
  1135. ["BM_One", "-0.1000", "+0.1000", "10", "9", "100", "110"],
  1136. ["BM_Two", "+0.1111", "-0.0111", "9", "10", "90", "89"],
  1137. ["BM_Two", "-0.1250", "-0.1628", "8", "7", "86", "72"],
  1138. [
  1139. "BM_Two_pvalue",
  1140. "1.0000",
  1141. "0.6667",
  1142. "U",
  1143. "Test,",
  1144. "Repetitions:",
  1145. "2",
  1146. "vs",
  1147. "2.",
  1148. "WARNING:",
  1149. "Results",
  1150. "unreliable!",
  1151. "9+",
  1152. "repetitions",
  1153. "recommended.",
  1154. ],
  1155. ["short", "-0.1250", "-0.0625", "8", "7", "80", "75"],
  1156. ["short", "-0.4325", "-0.1351", "8", "5", "77", "67"],
  1157. [
  1158. "short_pvalue",
  1159. "0.7671",
  1160. "0.2000",
  1161. "U",
  1162. "Test,",
  1163. "Repetitions:",
  1164. "2",
  1165. "vs",
  1166. "3.",
  1167. "WARNING:",
  1168. "Results",
  1169. "unreliable!",
  1170. "9+",
  1171. "repetitions",
  1172. "recommended.",
  1173. ],
  1174. ["medium", "-0.3750", "-0.3375", "8", "5", "80", "53"],
  1175. ["OVERALL_GEOMEAN", "+1.6405", "-0.6985", "0", "0", "0", "0"],
  1176. ]
  1177. output_lines_with_header = print_difference_report(
  1178. self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
  1179. )
  1180. output_lines = output_lines_with_header[2:]
  1181. print("\n")
  1182. print("\n".join(output_lines_with_header))
  1183. self.assertEqual(len(output_lines), len(expect_lines))
  1184. for i in range(0, len(output_lines)):
  1185. parts = [x for x in output_lines[i].split(" ") if x]
  1186. self.assertEqual(expect_lines[i], parts)
  1187. def test_json_diff_report(self):
  1188. expected_output = [
  1189. {
  1190. "name": "BM_One",
  1191. "measurements": [
  1192. {
  1193. "time": -0.1,
  1194. "cpu": 0.1,
  1195. "real_time": 10,
  1196. "real_time_other": 9,
  1197. "cpu_time": 100,
  1198. "cpu_time_other": 110,
  1199. }
  1200. ],
  1201. "time_unit": "ns",
  1202. "utest": {},
  1203. },
  1204. {
  1205. "name": "BM_Two",
  1206. "measurements": [
  1207. {
  1208. "time": 0.1111111111111111,
  1209. "cpu": -0.011111111111111112,
  1210. "real_time": 9,
  1211. "real_time_other": 10,
  1212. "cpu_time": 90,
  1213. "cpu_time_other": 89,
  1214. },
  1215. {
  1216. "time": -0.125,
  1217. "cpu": -0.16279069767441862,
  1218. "real_time": 8,
  1219. "real_time_other": 7,
  1220. "cpu_time": 86,
  1221. "cpu_time_other": 72,
  1222. },
  1223. ],
  1224. "time_unit": "ns",
  1225. "utest": {
  1226. "have_optimal_repetitions": False,
  1227. "cpu_pvalue": 0.6666666666666666,
  1228. "time_pvalue": 1.0,
  1229. },
  1230. },
  1231. {
  1232. "name": "short",
  1233. "measurements": [
  1234. {
  1235. "time": -0.125,
  1236. "cpu": -0.0625,
  1237. "real_time": 8,
  1238. "real_time_other": 7,
  1239. "cpu_time": 80,
  1240. "cpu_time_other": 75,
  1241. },
  1242. {
  1243. "time": -0.4325,
  1244. "cpu": -0.13506493506493514,
  1245. "real_time": 8,
  1246. "real_time_other": 4.54,
  1247. "cpu_time": 77,
  1248. "cpu_time_other": 66.6,
  1249. },
  1250. ],
  1251. "time_unit": "ns",
  1252. "utest": {
  1253. "have_optimal_repetitions": False,
  1254. "cpu_pvalue": 0.2,
  1255. "time_pvalue": 0.7670968684102772,
  1256. },
  1257. },
  1258. {
  1259. "name": "medium",
  1260. "measurements": [
  1261. {
  1262. "real_time_other": 5,
  1263. "cpu_time": 80,
  1264. "time": -0.375,
  1265. "real_time": 8,
  1266. "cpu_time_other": 53,
  1267. "cpu": -0.3375,
  1268. }
  1269. ],
  1270. "utest": {},
  1271. "time_unit": "ns",
  1272. "aggregate_name": "",
  1273. },
  1274. {
  1275. "name": "OVERALL_GEOMEAN",
  1276. "measurements": [
  1277. {
  1278. "real_time": 8.48528137423858e-09,
  1279. "cpu_time": 8.441336246629233e-08,
  1280. "real_time_other": 2.2405267593145244e-08,
  1281. "cpu_time_other": 2.5453661413660466e-08,
  1282. "time": 1.6404861082353634,
  1283. "cpu": -0.6984640740519662,
  1284. }
  1285. ],
  1286. "time_unit": "s",
  1287. "run_type": "aggregate",
  1288. "aggregate_name": "geomean",
  1289. "utest": {},
  1290. },
  1291. ]
  1292. self.assertEqual(len(self.json_diff_report), len(expected_output))
  1293. for out, expected in zip(self.json_diff_report, expected_output):
  1294. self.assertEqual(out["name"], expected["name"])
  1295. self.assertEqual(out["time_unit"], expected["time_unit"])
  1296. assert_utest(self, out, expected)
  1297. assert_measurements(self, out, expected)
  1298. class TestReportDifferenceForPercentageAggregates(unittest.TestCase):
  1299. @classmethod
  1300. def setUpClass(cls):
  1301. def load_results():
  1302. import json
  1303. testInputs = os.path.join(
  1304. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  1305. )
  1306. testOutput1 = os.path.join(testInputs, "test4_run0.json")
  1307. testOutput2 = os.path.join(testInputs, "test4_run1.json")
  1308. with open(testOutput1, "r") as f:
  1309. json1 = json.load(f)
  1310. with open(testOutput2, "r") as f:
  1311. json2 = json.load(f)
  1312. return json1, json2
  1313. json1, json2 = load_results()
  1314. cls.json_diff_report = get_difference_report(json1, json2, utest=True)
  1315. def test_json_diff_report_pretty_printing(self):
  1316. expect_lines = [["whocares", "-0.5000", "+0.5000", "0", "0", "0", "0"]]
  1317. output_lines_with_header = print_difference_report(
  1318. self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
  1319. )
  1320. output_lines = output_lines_with_header[2:]
  1321. print("\n")
  1322. print("\n".join(output_lines_with_header))
  1323. self.assertEqual(len(output_lines), len(expect_lines))
  1324. for i in range(0, len(output_lines)):
  1325. parts = [x for x in output_lines[i].split(" ") if x]
  1326. self.assertEqual(expect_lines[i], parts)
  1327. def test_json_diff_report(self):
  1328. expected_output = [
  1329. {
  1330. "name": "whocares",
  1331. "measurements": [
  1332. {
  1333. "time": -0.5,
  1334. "cpu": 0.5,
  1335. "real_time": 0.01,
  1336. "real_time_other": 0.005,
  1337. "cpu_time": 0.10,
  1338. "cpu_time_other": 0.15,
  1339. }
  1340. ],
  1341. "time_unit": "ns",
  1342. "utest": {},
  1343. }
  1344. ]
  1345. self.assertEqual(len(self.json_diff_report), len(expected_output))
  1346. for out, expected in zip(self.json_diff_report, expected_output):
  1347. self.assertEqual(out["name"], expected["name"])
  1348. self.assertEqual(out["time_unit"], expected["time_unit"])
  1349. assert_utest(self, out, expected)
  1350. assert_measurements(self, out, expected)
  1351. class TestReportSorting(unittest.TestCase):
  1352. @classmethod
  1353. def setUpClass(cls):
  1354. def load_result():
  1355. import json
  1356. testInputs = os.path.join(
  1357. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  1358. )
  1359. testOutput = os.path.join(testInputs, "test4_run.json")
  1360. with open(testOutput, "r") as f:
  1361. json = json.load(f)
  1362. return json
  1363. cls.json = load_result()
  1364. def test_json_diff_report_pretty_printing(self):
  1365. import util
  1366. expected_names = [
  1367. "99 family 0 instance 0 repetition 0",
  1368. "98 family 0 instance 0 repetition 1",
  1369. "97 family 0 instance 0 aggregate",
  1370. "96 family 0 instance 1 repetition 0",
  1371. "95 family 0 instance 1 repetition 1",
  1372. "94 family 0 instance 1 aggregate",
  1373. "93 family 1 instance 0 repetition 0",
  1374. "92 family 1 instance 0 repetition 1",
  1375. "91 family 1 instance 0 aggregate",
  1376. "90 family 1 instance 1 repetition 0",
  1377. "89 family 1 instance 1 repetition 1",
  1378. "88 family 1 instance 1 aggregate",
  1379. ]
  1380. for n in range(len(self.json["benchmarks"]) ** 2):
  1381. random.shuffle(self.json["benchmarks"])
  1382. sorted_benchmarks = util.sort_benchmark_results(self.json)[
  1383. "benchmarks"
  1384. ]
  1385. self.assertEqual(len(expected_names), len(sorted_benchmarks))
  1386. for out, expected in zip(sorted_benchmarks, expected_names):
  1387. self.assertEqual(out["name"], expected)
  1388. class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly2(
  1389. unittest.TestCase
  1390. ):
  1391. @classmethod
  1392. def setUpClass(cls):
  1393. def load_results():
  1394. import json
  1395. testInputs = os.path.join(
  1396. os.path.dirname(os.path.realpath(__file__)), "Inputs"
  1397. )
  1398. testOutput1 = os.path.join(testInputs, "test5_run0.json")
  1399. testOutput2 = os.path.join(testInputs, "test5_run1.json")
  1400. with open(testOutput1, "r") as f:
  1401. json1 = json.load(f)
  1402. json1["benchmarks"] = [
  1403. json1["benchmarks"][0] for i in range(1000)
  1404. ]
  1405. with open(testOutput2, "r") as f:
  1406. json2 = json.load(f)
  1407. json2["benchmarks"] = [
  1408. json2["benchmarks"][0] for i in range(1000)
  1409. ]
  1410. return json1, json2
  1411. json1, json2 = load_results()
  1412. cls.json_diff_report = get_difference_report(json1, json2, utest=True)
  1413. def test_json_diff_report_pretty_printing(self):
  1414. expect_line = [
  1415. "BM_ManyRepetitions_pvalue",
  1416. "0.0000",
  1417. "0.0000",
  1418. "U",
  1419. "Test,",
  1420. "Repetitions:",
  1421. "1000",
  1422. "vs",
  1423. "1000",
  1424. ]
  1425. output_lines_with_header = print_difference_report(
  1426. self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False
  1427. )
  1428. output_lines = output_lines_with_header[2:]
  1429. found = False
  1430. for i in range(0, len(output_lines)):
  1431. parts = [x for x in output_lines[i].split(" ") if x]
  1432. found = expect_line == parts
  1433. if found:
  1434. break
  1435. self.assertTrue(found)
  1436. def test_json_diff_report(self):
  1437. expected_output = [
  1438. {
  1439. "name": "BM_ManyRepetitions",
  1440. "label": "",
  1441. "time_unit": "s",
  1442. "run_type": "",
  1443. "aggregate_name": "",
  1444. "utest": {
  1445. "have_optimal_repetitions": True,
  1446. "cpu_pvalue": 0.0,
  1447. "time_pvalue": 0.0,
  1448. "nr_of_repetitions": 1000,
  1449. "nr_of_repetitions_other": 1000,
  1450. },
  1451. },
  1452. {
  1453. "name": "OVERALL_GEOMEAN",
  1454. "label": "",
  1455. "measurements": [
  1456. {
  1457. "real_time": 1.0,
  1458. "cpu_time": 1000.000000000069,
  1459. "real_time_other": 1000.000000000069,
  1460. "cpu_time_other": 1.0,
  1461. "time": 999.000000000069,
  1462. "cpu": -0.9990000000000001,
  1463. }
  1464. ],
  1465. "time_unit": "s",
  1466. "run_type": "aggregate",
  1467. "aggregate_name": "geomean",
  1468. "utest": {},
  1469. },
  1470. ]
  1471. self.assertEqual(len(self.json_diff_report), len(expected_output))
  1472. for out, expected in zip(self.json_diff_report, expected_output):
  1473. self.assertEqual(out["name"], expected["name"])
  1474. self.assertEqual(out["time_unit"], expected["time_unit"])
  1475. assert_utest(self, out, expected)
  1476. def assert_utest(unittest_instance, lhs, rhs):
  1477. if lhs["utest"]:
  1478. unittest_instance.assertAlmostEqual(
  1479. lhs["utest"]["cpu_pvalue"], rhs["utest"]["cpu_pvalue"]
  1480. )
  1481. unittest_instance.assertAlmostEqual(
  1482. lhs["utest"]["time_pvalue"], rhs["utest"]["time_pvalue"]
  1483. )
  1484. unittest_instance.assertEqual(
  1485. lhs["utest"]["have_optimal_repetitions"],
  1486. rhs["utest"]["have_optimal_repetitions"],
  1487. )
  1488. else:
  1489. # lhs is empty. assert if rhs is not.
  1490. unittest_instance.assertEqual(lhs["utest"], rhs["utest"])
  1491. def assert_measurements(unittest_instance, lhs, rhs):
  1492. for m1, m2 in zip(lhs["measurements"], rhs["measurements"]):
  1493. unittest_instance.assertEqual(m1["real_time"], m2["real_time"])
  1494. unittest_instance.assertEqual(m1["cpu_time"], m2["cpu_time"])
  1495. # m1['time'] and m1['cpu'] hold values which are being calculated,
  1496. # and therefore we must use almost-equal pattern.
  1497. unittest_instance.assertAlmostEqual(m1["time"], m2["time"], places=4)
  1498. unittest_instance.assertAlmostEqual(m1["cpu"], m2["cpu"], places=4)
  1499. if __name__ == "__main__":
  1500. unittest.main()
  1501. # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  1502. # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
  1503. # kate: indent-mode python; remove-trailing-spaces modified;