benchmark_utils.py 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import asyncio
  2. import time
  3. import numpy as np
  4. async def measure_latency_ms(async_fn, args, expected_output, num_requests=10):
  5. # warmup for 1sec
  6. start = time.time()
  7. while time.time() - start < 1:
  8. await async_fn(args)
  9. latency_stats = []
  10. for _ in range(num_requests):
  11. start = time.time()
  12. await async_fn(args) == expected_output
  13. end = time.time()
  14. latency_stats.append((end - start) * 1000)
  15. return latency_stats
  16. async def measure_throughput_tps(async_fn, args, expected_output, duration_secs=10):
  17. # warmup for 1sec
  18. start = time.time()
  19. while time.time() - start < 1:
  20. await async_fn(args)
  21. tps_stats = []
  22. for _ in range(duration_secs):
  23. start = time.time()
  24. request_completed = 0
  25. while time.time() - start < 1:
  26. await async_fn(args) == expected_output
  27. request_completed += 1
  28. tps_stats.append(request_completed)
  29. return tps_stats
  30. async def benchmark_throughput_tps(
  31. async_fn,
  32. expected,
  33. duration_secs=10,
  34. num_clients=1,
  35. ):
  36. """Call deployment handle in a blocking for loop from multiple clients."""
  37. client_tasks = [measure_throughput_tps for _ in range(num_clients)]
  38. throughput_stats_tps_list = await asyncio.gather(
  39. *[
  40. client_task(
  41. async_fn,
  42. 0,
  43. expected,
  44. duration_secs=duration_secs,
  45. )
  46. for client_task in client_tasks
  47. ]
  48. )
  49. throughput_stats_tps = []
  50. for client_rst in throughput_stats_tps_list:
  51. throughput_stats_tps.extend(client_rst)
  52. mean = round(np.mean(throughput_stats_tps), 2)
  53. std = round(np.std(throughput_stats_tps), 2)
  54. return mean, std
  55. async def benchmark_latency_ms(async_fn, expected, num_requests=100, num_clients=1):
  56. """Call deployment handle in a blocking for loop from multiple clients."""
  57. client_tasks = [measure_latency_ms for _ in range(num_clients)]
  58. latency_stats_ms_list = await asyncio.gather(
  59. *[
  60. client_task(
  61. async_fn,
  62. 0,
  63. expected,
  64. num_requests=num_requests,
  65. )
  66. for client_task in client_tasks
  67. ]
  68. )
  69. latency_stats_ms = []
  70. for client_rst in latency_stats_ms_list:
  71. latency_stats_ms.extend(client_rst)
  72. mean = round(np.mean(latency_stats_ms), 2)
  73. std = round(np.std(latency_stats_ms), 2)
  74. return mean, std