build.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. from typing import List, Optional, Dict
  2. import boto3
  3. import hashlib
  4. import os
  5. import subprocess
  6. import sys
  7. import time
  8. from ray_release.config import RELEASE_PACKAGE_DIR
  9. from ray_release.configs.global_config import get_global_config
  10. from ray_release.logger import logger
  11. from ray_release.test import (
  12. Test,
  13. DATAPLANE_ECR_REPO,
  14. DATAPLANE_ECR_ML_REPO,
  15. )
  16. DATAPLANE_S3_BUCKET = "ray-release-automation-results"
  17. DATAPLANE_FILENAME = "dataplane_20240613.tar.gz"
  18. DATAPLANE_DIGEST = "b7c68dd3cf9ef05b2b1518a32729fc036f06ae83bae9b9ecd241e33b37868944"
  19. BASE_IMAGE_WAIT_TIMEOUT = 7200
  20. BASE_IMAGE_WAIT_DURATION = 30
  21. RELEASE_BYOD_DIR = os.path.join(RELEASE_PACKAGE_DIR, "ray_release/byod")
  22. REQUIREMENTS_BYOD = "requirements_byod"
  23. REQUIREMENTS_ML_BYOD = "requirements_ml_byod"
  24. def build_champagne_image(
  25. ray_version: str,
  26. python_version: str,
  27. image_type: str,
  28. ) -> str:
  29. """
  30. Builds the Anyscale champagne image.
  31. """
  32. _download_dataplane_build_file()
  33. env = os.environ.copy()
  34. env["DOCKER_BUILDKIT"] = "1"
  35. if image_type == "cpu":
  36. ray_project = "ray"
  37. anyscale_repo = DATAPLANE_ECR_REPO
  38. else:
  39. ray_project = "ray-ml"
  40. anyscale_repo = DATAPLANE_ECR_ML_REPO
  41. ray_image = f"rayproject/{ray_project}:{ray_version}-{python_version}-{image_type}"
  42. anyscale_image = (
  43. f"{get_global_config()['byod_ecr']}/{anyscale_repo}:champagne-{ray_version}"
  44. )
  45. logger.info(f"Building champagne anyscale image from {ray_image}")
  46. with open(DATAPLANE_FILENAME, "rb") as build_file:
  47. subprocess.check_call(
  48. [
  49. "docker",
  50. "build",
  51. "--progress=plain",
  52. "--build-arg",
  53. f"BASE_IMAGE={ray_image}",
  54. "-t",
  55. anyscale_image,
  56. "-",
  57. ],
  58. stdin=build_file,
  59. stdout=sys.stderr,
  60. env=env,
  61. )
  62. _validate_and_push(anyscale_image)
  63. return anyscale_image
  64. def build_anyscale_custom_byod_image(test: Test) -> None:
  65. if not test.require_custom_byod_image():
  66. logger.info(f"Test {test.get_name()} does not require a custom byod image")
  67. return
  68. byod_image = test.get_anyscale_byod_image()
  69. if _image_exist(byod_image):
  70. logger.info(f"Image {byod_image} already exists")
  71. return
  72. env = os.environ.copy()
  73. env["DOCKER_BUILDKIT"] = "1"
  74. subprocess.check_call(
  75. [
  76. "docker",
  77. "build",
  78. "--progress=plain",
  79. "--build-arg",
  80. f"BASE_IMAGE={test.get_anyscale_base_byod_image()}",
  81. "--build-arg",
  82. f"POST_BUILD_SCRIPT={test.get_byod_post_build_script()}",
  83. "-t",
  84. byod_image,
  85. "-f",
  86. os.path.join(RELEASE_BYOD_DIR, "byod.custom.Dockerfile"),
  87. RELEASE_BYOD_DIR,
  88. ],
  89. stdout=sys.stderr,
  90. env=env,
  91. )
  92. _validate_and_push(byod_image)
  93. def build_anyscale_base_byod_images(tests: List[Test]) -> None:
  94. """
  95. Builds the Anyscale BYOD images for the given tests.
  96. """
  97. _download_dataplane_build_file()
  98. to_be_built = {}
  99. built = set()
  100. for test in tests:
  101. to_be_built[test.get_anyscale_base_byod_image()] = test
  102. env = os.environ.copy()
  103. env["DOCKER_BUILDKIT"] = "1"
  104. start = int(time.time())
  105. # ray images are built on post-merge, so we can wait for them to be available
  106. while (
  107. len(built) < len(to_be_built)
  108. and int(time.time()) - start < BASE_IMAGE_WAIT_TIMEOUT
  109. ):
  110. for byod_image, test in to_be_built.items():
  111. py_version = test.get_python_version()
  112. if test.use_byod_ml_image():
  113. byod_requirements = f"{REQUIREMENTS_ML_BYOD}_{py_version}.txt"
  114. else:
  115. byod_requirements = f"{REQUIREMENTS_BYOD}_{py_version}.txt"
  116. if _image_exist(byod_image):
  117. logger.info(f"Image {byod_image} already exists")
  118. built.add(byod_image)
  119. continue
  120. ray_image = test.get_ray_image()
  121. if not _image_exist(ray_image):
  122. # TODO(can): instead of waiting for the base image to be built, we can
  123. # build it ourselves
  124. timeout = BASE_IMAGE_WAIT_TIMEOUT - (int(time.time()) - start)
  125. logger.info(
  126. f"Image {ray_image} does not exist yet. "
  127. f"Wait for another {timeout}s..."
  128. )
  129. time.sleep(BASE_IMAGE_WAIT_DURATION)
  130. continue
  131. logger.info(f"Building {byod_image} from {ray_image}")
  132. with open(DATAPLANE_FILENAME, "rb") as build_file:
  133. subprocess.check_call(
  134. [
  135. "docker",
  136. "build",
  137. "--progress=plain",
  138. "--build-arg",
  139. f"BASE_IMAGE={ray_image}",
  140. "-t",
  141. byod_image,
  142. "-",
  143. ],
  144. stdin=build_file,
  145. stdout=sys.stderr,
  146. env=env,
  147. )
  148. subprocess.check_call(
  149. [
  150. "docker",
  151. "build",
  152. "--progress=plain",
  153. "--build-arg",
  154. f"BASE_IMAGE={byod_image}",
  155. "--build-arg",
  156. f"PIP_REQUIREMENTS={byod_requirements}",
  157. "--build-arg",
  158. "DEBIAN_REQUIREMENTS=requirements_debian_byod.txt",
  159. "-t",
  160. byod_image,
  161. "-f",
  162. os.path.join(RELEASE_BYOD_DIR, "byod.Dockerfile"),
  163. RELEASE_BYOD_DIR,
  164. ],
  165. stdout=sys.stderr,
  166. env=env,
  167. )
  168. _validate_and_push(byod_image)
  169. built.add(byod_image)
  170. def _validate_and_push(byod_image: str) -> None:
  171. """
  172. Validates the given image and pushes it to ECR.
  173. """
  174. docker_ray_commit = (
  175. subprocess.check_output(
  176. [
  177. "docker",
  178. "run",
  179. "-ti",
  180. "--entrypoint",
  181. "python",
  182. byod_image,
  183. "-c",
  184. "import ray; print(ray.__commit__)",
  185. ],
  186. )
  187. .decode("utf-8")
  188. .strip()
  189. )
  190. if os.environ.get("RAY_IMAGE_TAG"):
  191. logger.info(f"Ray commit from image: {docker_ray_commit}")
  192. else:
  193. expected_ray_commit = _get_ray_commit()
  194. assert (
  195. docker_ray_commit == expected_ray_commit
  196. ), f"Expected ray commit {expected_ray_commit}, found {docker_ray_commit}"
  197. logger.info(f"Pushing image to registry: {byod_image}")
  198. subprocess.check_call(
  199. ["docker", "push", byod_image],
  200. stdout=sys.stderr,
  201. )
  202. def _get_ray_commit(envs: Optional[Dict[str, str]] = None) -> str:
  203. if envs is None:
  204. envs = os.environ
  205. for key in [
  206. "RAY_WANT_COMMIT_IN_IMAGE",
  207. "COMMIT_TO_TEST",
  208. "BUILDKITE_COMMIT",
  209. ]:
  210. commit = envs.get(key, "")
  211. if commit:
  212. return commit
  213. return ""
  214. def _download_dataplane_build_file() -> None:
  215. """
  216. Downloads the dataplane build file from S3.
  217. """
  218. s3 = boto3.client("s3")
  219. s3.download_file(
  220. Bucket=DATAPLANE_S3_BUCKET,
  221. Key=DATAPLANE_FILENAME,
  222. Filename=DATAPLANE_FILENAME,
  223. )
  224. with open(DATAPLANE_FILENAME, "rb") as build_context:
  225. digest = hashlib.sha256(build_context.read()).hexdigest()
  226. assert digest == DATAPLANE_DIGEST, "Mismatched dataplane digest found!"
  227. def _image_exist(image: str) -> bool:
  228. """
  229. Checks if the given image exists in Docker
  230. """
  231. p = subprocess.run(
  232. ["docker", "manifest", "inspect", image],
  233. stdout=sys.stderr,
  234. stderr=sys.stderr,
  235. )
  236. return p.returncode == 0