compare_perf_metrics 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. #!/usr/bin/env python3
  2. import json
  3. import pathlib
  4. import argparse
  5. import sys
  6. def parse_args():
  7. parser = argparse.ArgumentParser(
  8. description="Automate the process of calculating relative change in "
  9. "perf_metrics. This makes catching regressions much easier."
  10. )
  11. parser.add_argument(
  12. "old_dir_name",
  13. type=str,
  14. help="The name of the directory containing the last release "
  15. "performance logs, e.g. 2.2.0",
  16. )
  17. parser.add_argument(
  18. "new_dir_name",
  19. type=str,
  20. help="The name of the directory containing the new release "
  21. "performance logs, e.g. 2.3.0",
  22. )
  23. args = parser.parse_args()
  24. return args
  25. def main(old_dir_name, new_dir_name):
  26. old_paths = paths_without_root_dir(walk(old_dir_name))
  27. new_paths = paths_without_root_dir(walk(new_dir_name))
  28. to_compare, missing_in_new, missing_in_old = get_compare_list(old_paths, new_paths)
  29. for path in missing_in_new:
  30. print(new_dir_name, "does not have", path)
  31. for path in missing_in_old:
  32. print(old_dir_name, "does not have", path)
  33. throughput_regressions = []
  34. latency_regressions = []
  35. missing_in_new = []
  36. missing_in_old = []
  37. for path in to_compare:
  38. old = pathlib.Path(old_dir_name, *path.parts)
  39. new = pathlib.Path(new_dir_name, *path.parts)
  40. throughput, latency, new, old = get_regressions(old, new)
  41. throughput_regressions.extend(throughput)
  42. latency_regressions.extend(latency)
  43. missing_in_new.extend(new)
  44. missing_in_old.extend(old)
  45. for perf_metric in missing_in_new:
  46. print(f"{new} does not have {perf_metric}")
  47. for perf_metric in missing_in_old:
  48. print(f"{old} does not have {perf_metric}")
  49. throughput_regressions.sort()
  50. for _, regression in throughput_regressions:
  51. print(regression)
  52. latency_regressions.sort(reverse=True)
  53. for _, regression in latency_regressions:
  54. print(regression)
  55. def walk(dir_name):
  56. stack = [pathlib.Path(dir_name)]
  57. while stack:
  58. root = stack.pop()
  59. if not root.is_dir():
  60. yield root
  61. else:
  62. stack.extend(root.iterdir())
  63. def paths_without_root_dir(paths):
  64. for p in paths:
  65. yield pathlib.Path(*p.parts[1:])
  66. def get_compare_list(old, new):
  67. old_set = set(old)
  68. new_set = set(new)
  69. return (
  70. old_set.intersection(new_set),
  71. old_set.difference(new_set),
  72. new_set.difference(old_set),
  73. )
  74. def get_regressions(old_path, new_path):
  75. with open(old_path, "r") as f:
  76. old = json.load(f)
  77. with open(new_path, "r") as f:
  78. new = json.load(f)
  79. def perf_metrics(root):
  80. return root["perf_metrics"]
  81. def types(perf_metric):
  82. return perf_metric["perf_metric_type"]
  83. def values(perf_metric):
  84. return perf_metric["perf_metric_value"]
  85. def names(perf_metric):
  86. return perf_metric["perf_metric_name"]
  87. def list_to_dict(input_list, key_selector, value_selector):
  88. return {key_selector(e): value_selector(e) for e in input_list}
  89. old_values = list_to_dict(perf_metrics(old), names, values)
  90. new_values = list_to_dict(perf_metrics(new), names, values)
  91. perf_metric_types = {
  92. **list_to_dict(perf_metrics(old), names, types),
  93. **list_to_dict(perf_metrics(new), names, types),
  94. }
  95. to_compare, missing_in_new, missing_in_old = get_compare_list(
  96. old_values.keys(),
  97. new_values.keys(),
  98. )
  99. regressions = []
  100. throughput_regressions = []
  101. latency_regression = []
  102. for perf_metric_name in to_compare:
  103. perf_type = perf_metric_types[perf_metric_name]
  104. old_value = old_values[perf_metric_name]
  105. new_value = new_values[perf_metric_name]
  106. ratio = new_value / old_value
  107. ratio_str = f"{100 * abs(ratio - 1):.02f}%"
  108. regression_message = f"""REGRESSION {ratio_str}: {perf_metric_name} ({perf_type}) regresses from {old_value} to {new_value} ({ratio_str}) in {new_path}"""
  109. if perf_type == "THROUGHPUT":
  110. if ratio < 1.0:
  111. throughput_regressions.append((ratio, regression_message))
  112. elif perf_type == "LATENCY":
  113. if ratio > 1.0:
  114. latency_regression.append((ratio, regression_message))
  115. else:
  116. raise ValueError(f"perf_metric_name not of expected type {perf_type}")
  117. return throughput_regressions, latency_regression, missing_in_new, missing_in_old
  118. if __name__ == "__main__":
  119. args = parse_args()
  120. sys.exit(main(args.old_dir_name, args.new_dir_name))