test.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287
  1. import enum
  2. import os
  3. import json
  4. import time
  5. from typing import Optional, List, Dict
  6. from dataclasses import dataclass
  7. import boto3
  8. from botocore.exceptions import ClientError
  9. from ray_release.result import (
  10. ResultStatus,
  11. Result,
  12. )
  13. from ray_release.logger import logger
  14. AWS_BUCKET = "ray-ci-results"
  15. AWS_TEST_KEY = "ray_tests"
  16. AWS_TEST_RESULT_KEY = "ray_test_results"
  17. DEFAULT_PYTHON_VERSION = tuple(
  18. int(v) for v in os.environ.get("RELEASE_PY", "3.7").split(".")
  19. )
  20. DATAPLANE_ECR = "029272617770.dkr.ecr.us-west-2.amazonaws.com"
  21. DATAPLANE_ECR_REPO = "anyscale/ray"
  22. DATAPLANE_ECR_ML_REPO = "anyscale/ray-ml"
  23. def _convert_env_list_to_dict(env_list: List[str]) -> Dict[str, str]:
  24. env_dict = {}
  25. for env in env_list:
  26. # an env can be "a=b" or just "a"
  27. eq_pos = env.find("=")
  28. if eq_pos < 0:
  29. env_dict[env] = os.environ.get(env, "")
  30. else:
  31. env_dict[env[:eq_pos]] = env[eq_pos + 1 :]
  32. return env_dict
  33. class TestState(enum.Enum):
  34. """
  35. Overall state of the test
  36. """
  37. JAILED = "jailed"
  38. FAILING = "failing"
  39. CONSITENTLY_FAILING = "consistently_failing"
  40. PASSING = "passing"
  41. @dataclass
  42. class TestResult:
  43. status: str
  44. commit: str
  45. url: str
  46. timestamp: int
  47. @classmethod
  48. def from_result(cls, result: Result):
  49. return cls(
  50. status=result.status,
  51. commit=os.environ.get("BUILDKITE_COMMIT", ""),
  52. url=result.buildkite_url,
  53. timestamp=int(time.time() * 1000),
  54. )
  55. @classmethod
  56. def from_dict(cls, result: dict):
  57. return cls(
  58. status=result["status"],
  59. commit=result["commit"],
  60. url=result["url"],
  61. timestamp=result["timestamp"],
  62. )
  63. def is_failing(self) -> bool:
  64. return not self.is_passing()
  65. def is_passing(self) -> bool:
  66. return self.status == ResultStatus.SUCCESS.value
  67. class Test(dict):
  68. """A class represents a test to run on buildkite"""
  69. KEY_GITHUB_ISSUE_NUMBER = "github_issue_number"
  70. KEY_BISECT_BUILD_NUMBER = "bisect_build_number"
  71. KEY_BISECT_BLAMED_COMMIT = "bisect_blamed_commit"
  72. def __init__(self, *args, **kwargs):
  73. super().__init__(*args, **kwargs)
  74. self.test_results = None
  75. def is_byod_cluster(self) -> bool:
  76. """
  77. Returns whether this test is running on a BYOD cluster.
  78. """
  79. return self["cluster"].get("byod") is not None
  80. def get_byod_type(self) -> Optional[str]:
  81. """
  82. Returns the type of the BYOD cluster.
  83. """
  84. if not self.is_byod_cluster():
  85. return None
  86. return self["cluster"]["byod"].get("type", "cpu")
  87. def get_byod_pre_run_cmds(self) -> List[str]:
  88. """
  89. Returns the list of pre-run commands for the BYOD cluster.
  90. """
  91. if not self.is_byod_cluster():
  92. return []
  93. return self["cluster"]["byod"].get("pre_run_cmds", [])
  94. def get_byod_runtime_env(self) -> Dict[str, str]:
  95. """
  96. Returns the runtime environment variables for the BYOD cluster.
  97. """
  98. if not self.is_byod_cluster():
  99. return {}
  100. return _convert_env_list_to_dict(self["cluster"]["byod"].get("runtime_env", []))
  101. def get_name(self) -> str:
  102. """
  103. Returns the name of the test.
  104. """
  105. return self["name"]
  106. def get_oncall(self) -> str:
  107. """
  108. Returns the oncall for the test.
  109. """
  110. return self["team"]
  111. def update_from_s3(self) -> None:
  112. """
  113. Update test object with data field from s3
  114. """
  115. try:
  116. data = (
  117. boto3.client("s3")
  118. .get_object(
  119. Bucket=AWS_BUCKET,
  120. Key=f"{AWS_TEST_KEY}/{self.get_name()}.json",
  121. )
  122. .get("Body")
  123. .read()
  124. .decode("utf-8")
  125. )
  126. except ClientError as e:
  127. logger.warning(f"Failed to update data for {self.get_name()} from s3: {e}")
  128. return
  129. self.update(json.loads(data))
  130. def get_state(self) -> TestState:
  131. """
  132. Returns the state of the test.
  133. """
  134. return TestState(self.get("state", TestState.PASSING.value))
  135. def set_state(self, state: TestState) -> None:
  136. """
  137. Sets the state of the test.
  138. """
  139. self["state"] = state.value
  140. def get_python_version(self) -> str:
  141. """
  142. Returns the python version to use for this test. If not specified, use
  143. the default python version.
  144. """
  145. return self.get("python", ".".join(str(v) for v in DEFAULT_PYTHON_VERSION))
  146. def get_byod_image_tag(self) -> str:
  147. """
  148. Returns the byod image tag to use for this test.
  149. """
  150. commit = os.environ.get(
  151. "COMMIT_TO_TEST",
  152. os.environ["BUILDKITE_COMMIT"],
  153. )
  154. branch = os.environ.get(
  155. "BRANCH_TO_TEST",
  156. os.environ["BUILDKITE_BRANCH"],
  157. )
  158. ray_version = commit[:6]
  159. assert branch == "master" or branch.startswith(
  160. "releases/"
  161. ), f"Invalid branch name {branch}"
  162. if branch.startswith("releases/"):
  163. release_name = branch[len("releases/") :]
  164. ray_version = f"{release_name}.{ray_version}"
  165. image_suffix = "-gpu" if self.get_byod_type() == "gpu" else ""
  166. python_version = f"py{self.get_python_version().replace('.', '')}"
  167. return f"{ray_version}-{python_version}{image_suffix}"
  168. def get_byod_repo(self) -> str:
  169. """
  170. Returns the byod repo to use for this test.
  171. """
  172. return (
  173. DATAPLANE_ECR_ML_REPO
  174. if self.get_byod_type() == "gpu"
  175. else DATAPLANE_ECR_REPO
  176. )
  177. def get_ray_image(self) -> str:
  178. """
  179. Returns the ray docker image to use for this test.
  180. """
  181. ray_project = "ray-ml" if self.get_byod_type() == "gpu" else "ray"
  182. return f"rayproject/{ray_project}:{self.get_byod_image_tag()}"
  183. def get_anyscale_byod_image(self) -> str:
  184. """
  185. Returns the anyscale byod image to use for this test.
  186. """
  187. return f"{DATAPLANE_ECR}/{self.get_byod_repo()}:{self.get_byod_image_tag()}"
  188. def get_test_results(
  189. self, limit: int = 10, refresh: bool = False
  190. ) -> List[TestResult]:
  191. """
  192. Get test result from test object, or s3
  193. :param limit: limit of test results to return
  194. :param refresh: whether to refresh the test results from s3
  195. """
  196. if self.test_results is not None and not refresh:
  197. return self.test_results
  198. s3_client = boto3.client("s3")
  199. files = sorted(
  200. s3_client.list_objects_v2(
  201. Bucket=AWS_BUCKET,
  202. Prefix=f"{AWS_TEST_RESULT_KEY}/{self.get_name()}-",
  203. ).get("Contents", []),
  204. key=lambda file: int(file["LastModified"].strftime("%s")),
  205. reverse=True,
  206. )[:limit]
  207. self.test_results = [
  208. TestResult.from_dict(
  209. json.loads(
  210. s3_client.get_object(
  211. Bucket=AWS_BUCKET,
  212. Key=file["Key"],
  213. )
  214. .get("Body")
  215. .read()
  216. .decode("utf-8")
  217. )
  218. )
  219. for file in files
  220. ]
  221. return self.test_results
  222. def persist_result_to_s3(self, result: Result) -> bool:
  223. """
  224. Persist test result object to s3
  225. """
  226. boto3.client("s3").put_object(
  227. Bucket=AWS_BUCKET,
  228. Key=f"{AWS_TEST_RESULT_KEY}/"
  229. f"{self.get_name()}-{int(time.time() * 1000)}.json",
  230. Body=json.dumps(TestResult.from_result(result).__dict__),
  231. )
  232. def persist_to_s3(self) -> bool:
  233. """
  234. Persist test object to s3
  235. """
  236. boto3.client("s3").put_object(
  237. Bucket=AWS_BUCKET,
  238. Key=f"{AWS_TEST_KEY}/{self.get_name()}.json",
  239. Body=json.dumps(self),
  240. )
  241. class TestDefinition(dict):
  242. """
  243. A class represents a definition of a test, such as test name, group, etc. Comparing
  244. to the test class, there are additional field, for example variations, which can be
  245. used to define several variations of a test.
  246. """
  247. pass