build-docker-images.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. import argparse
  2. import datetime
  3. import json
  4. import functools
  5. import glob
  6. import itertools
  7. import os
  8. import re
  9. import shutil
  10. import subprocess
  11. import sys
  12. from collections import defaultdict
  13. from typing import List, Tuple
  14. import docker
  15. print = functools.partial(print, file=sys.stderr, flush=True)
  16. DOCKER_USERNAME = "raytravisbot"
  17. DOCKER_CLIENT = None
  18. PYTHON_WHL_VERSION = "cp3"
  19. DOCKER_HUB_DESCRIPTION = {
  20. "base-deps": (
  21. "Internal Image, refer to " "https://hub.docker.com/r/rayproject/ray"
  22. ),
  23. "ray-deps": ("Internal Image, refer to " "https://hub.docker.com/r/rayproject/ray"),
  24. "ray": "Official Docker Images for Ray, the distributed computing API.",
  25. "ray-ml": "Developer ready Docker Image for Ray.",
  26. "ray-worker-container": "Internal Image for CI test",
  27. }
  28. PY_MATRIX = {
  29. "py36": "3.6.12",
  30. "py37": "3.7.7",
  31. "py38": "3.8.5",
  32. "py39": "3.9.5",
  33. "py310": "3.10.4",
  34. }
  35. BASE_IMAGES = {
  36. "cu116": "nvidia/cuda:11.6.1-cudnn8-devel-ubuntu18.04",
  37. "cu113": "nvidia/cuda:11.3.1-cudnn8-devel-ubuntu18.04",
  38. "cu112": "nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04",
  39. "cu111": "nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04",
  40. "cu110": "nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04",
  41. "cu102": "nvidia/cuda:10.2-cudnn8-devel-ubuntu18.04",
  42. "cu101": "nvidia/cuda:10.1-cudnn8-devel-ubuntu18.04",
  43. "cpu": "ubuntu:focal",
  44. }
  45. CUDA_FULL = {
  46. "cu116": "CUDA 11.6",
  47. "cu113": "CUDA 11.3",
  48. "cu112": "CUDA 11.2",
  49. "cu111": "CUDA 11.1",
  50. "cu110": "CUDA 11.0",
  51. "cu102": "CUDA 10.2",
  52. "cu101": "CUDA 10.1",
  53. }
  54. # The CUDA version to use for the ML Docker image.
  55. # If changing the CUDA version in the below line, you should also change the base Docker
  56. # image being used in ~/.buildkite/Dockerfile.gpu to match the same image being used
  57. # here.
  58. ML_CUDA_VERSION = "cu112"
  59. DEFAULT_PYTHON_VERSION = "py37"
  60. IMAGE_NAMES = list(DOCKER_HUB_DESCRIPTION.keys())
  61. def _get_branch():
  62. branch = os.environ.get("TRAVIS_BRANCH") or os.environ.get("BUILDKITE_BRANCH")
  63. if not branch:
  64. print("Branch not found!")
  65. print(os.environ)
  66. print("Environment is above ^^")
  67. return branch
  68. def _release_build():
  69. branch = _get_branch()
  70. if branch is None:
  71. return False
  72. return branch != "master" and branch.startswith("releases")
  73. def _valid_branch():
  74. branch = _get_branch()
  75. if branch is None:
  76. return False
  77. return branch == "master" or _release_build()
  78. def _get_curr_dir():
  79. return os.path.dirname(os.path.realpath(__file__))
  80. def _get_root_dir():
  81. return os.path.join(_get_curr_dir(), "../../")
  82. def _get_commit_sha():
  83. sha = os.environ.get("TRAVIS_COMMIT") or os.environ.get("BUILDKITE_COMMIT") or ""
  84. if len(sha) < 6:
  85. print("INVALID SHA FOUND")
  86. return "ERROR"
  87. return sha[:6]
  88. def _configure_human_version():
  89. global _get_branch
  90. global _get_commit_sha
  91. fake_branch_name = input(
  92. "Provide a 'branch name'. For releases, it " "should be `releases/x.x.x`"
  93. )
  94. _get_branch = lambda: fake_branch_name # noqa: E731
  95. fake_sha = input("Provide a SHA (used for tag value)")
  96. _get_commit_sha = lambda: fake_sha # noqa: E731
  97. def _get_wheel_name(minor_version_number):
  98. if minor_version_number:
  99. matches = [
  100. file
  101. for file in glob.glob(
  102. f"{_get_root_dir()}/.whl/ray-*{PYTHON_WHL_VERSION}"
  103. f"{minor_version_number}*-manylinux*"
  104. )
  105. if "+" not in file # Exclude dbg, asan builds
  106. ]
  107. assert len(matches) == 1, (
  108. f"Found ({len(matches)}) matches for 'ray-*{PYTHON_WHL_VERSION}"
  109. f"{minor_version_number}*-manylinux*' instead of 1.\n"
  110. f"wheel matches: {matches}"
  111. )
  112. return os.path.basename(matches[0])
  113. else:
  114. matches = glob.glob(f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}*-manylinux*")
  115. return [os.path.basename(i) for i in matches]
  116. def _check_if_docker_files_modified():
  117. stdout = subprocess.check_output(
  118. [
  119. sys.executable,
  120. f"{_get_curr_dir()}/../pipeline/determine_tests_to_run.py",
  121. "--output=json",
  122. ]
  123. )
  124. affected_env_var_list = json.loads(stdout)
  125. affected = (
  126. "RAY_CI_DOCKER_AFFECTED" in affected_env_var_list
  127. or "RAY_CI_PYTHON_DEPENDENCIES_AFFECTED" in affected_env_var_list
  128. )
  129. print(f"Docker affected: {affected}")
  130. return affected
  131. def _build_docker_image(
  132. image_name: str, py_version: str, image_type: str, no_cache=True
  133. ):
  134. """Builds Docker image with the provided info.
  135. image_name: The name of the image to build. Must be one of
  136. IMAGE_NAMES.
  137. py_version: The Python version to build the image for.
  138. Must be one of PY_MATRIX.keys()
  139. image_type: The image type to build. Must be one of
  140. BASE_IMAGES.keys()
  141. no_cache: If True, don't use caching when building the image.
  142. """
  143. if image_name not in IMAGE_NAMES:
  144. raise ValueError(
  145. f"The provided image name {image_name} is not "
  146. f"recognized. Image names must be one of {IMAGE_NAMES}"
  147. )
  148. if py_version not in PY_MATRIX.keys():
  149. raise ValueError(
  150. f"The provided python version {py_version} is not "
  151. f"recognized. Python version must be one of"
  152. f" {PY_MATRIX.keys()}"
  153. )
  154. if image_type not in BASE_IMAGES.keys():
  155. raise ValueError(
  156. f"The provided CUDA version {image_type} is not "
  157. f"recognized. CUDA version must be one of"
  158. f" {BASE_IMAGES.keys()}"
  159. )
  160. # TODO(https://github.com/ray-project/ray/issues/16599):
  161. # remove below after supporting ray-ml images with Python 3.9+
  162. if image_name == "ray-ml" and py_version in {"py39", "py310"}:
  163. print(f"{image_name} image is currently unsupported with " "Python 3.9/3.10")
  164. return
  165. build_args = {}
  166. build_args["PYTHON_VERSION"] = PY_MATRIX[py_version]
  167. # I.e. "py310"[3:] == 10
  168. build_args["PYTHON_MINOR_VERSION"] = py_version[3:]
  169. device_tag = f"{image_type}"
  170. if image_name == "base-deps":
  171. base_image = BASE_IMAGES[image_type]
  172. else:
  173. base_image = f"-{py_version}-{device_tag}"
  174. if image_name != "ray-worker-container":
  175. build_args["BASE_IMAGE"] = base_image
  176. if image_name in ["ray", "ray-deps", "ray-worker-container"]:
  177. wheel = _get_wheel_name(build_args["PYTHON_MINOR_VERSION"])
  178. build_args["WHEEL_PATH"] = f".whl/{wheel}"
  179. # Add pip option "--find-links .whl/" to ensure ray-cpp wheel
  180. # can be found.
  181. build_args["FIND_LINKS_PATH"] = ".whl"
  182. tagged_name = f"rayproject/{image_name}:nightly-{py_version}-{device_tag}"
  183. for i in range(2):
  184. cleanup = DOCKER_CLIENT.containers.prune().get("SpaceReclaimed")
  185. if cleanup is not None:
  186. print(f"Cleaned up {cleanup / (2 ** 20)}MB")
  187. labels = {
  188. "image-name": image_name,
  189. "python-version": PY_MATRIX[py_version],
  190. "ray-commit": _get_commit_sha(),
  191. }
  192. if image_type in CUDA_FULL:
  193. labels["cuda-version"] = CUDA_FULL[image_type]
  194. output = DOCKER_CLIENT.api.build(
  195. path=os.path.join(_get_root_dir(), "docker", image_name),
  196. tag=tagged_name,
  197. nocache=no_cache,
  198. labels=labels,
  199. buildargs=build_args,
  200. )
  201. cmd_output = []
  202. try:
  203. start = datetime.datetime.now()
  204. current_iter = start
  205. for line in output:
  206. cmd_output.append(line.decode("utf-8"))
  207. if datetime.datetime.now() - current_iter >= datetime.timedelta(
  208. minutes=5
  209. ):
  210. current_iter = datetime.datetime.now()
  211. elapsed = datetime.datetime.now() - start
  212. print(
  213. f"Still building {tagged_name} after "
  214. f"{elapsed.seconds} seconds"
  215. )
  216. if elapsed >= datetime.timedelta(minutes=15):
  217. print("Additional build output:")
  218. print(*cmd_output, sep="\n")
  219. # Clear cmd_output after printing, so the next
  220. # iteration will not print out the same lines.
  221. cmd_output = []
  222. except Exception as e:
  223. print(f"FAILURE with error {e}")
  224. if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
  225. print(f"ERROR building: {tagged_name}. Output below:")
  226. print(*cmd_output, sep="\n")
  227. if i == 1:
  228. raise Exception("FAILED TO BUILD IMAGE")
  229. print("TRYING AGAIN")
  230. else:
  231. break
  232. print("BUILT: ", tagged_name)
  233. def copy_wheels(human_build):
  234. if human_build:
  235. print(
  236. "Please download images using:\n"
  237. "`pip download --python-version <py_version> ray==<ray_version>"
  238. )
  239. root_dir = _get_root_dir()
  240. wheels = _get_wheel_name(None)
  241. for wheel in wheels:
  242. source = os.path.join(root_dir, ".whl", wheel)
  243. ray_dst = os.path.join(root_dir, "docker/ray/.whl/")
  244. ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/")
  245. ray_worker_container_dst = os.path.join(
  246. root_dir, "docker/ray-worker-container/.whl/"
  247. )
  248. os.makedirs(ray_dst, exist_ok=True)
  249. shutil.copy(source, ray_dst)
  250. os.makedirs(ray_dep_dst, exist_ok=True)
  251. shutil.copy(source, ray_dep_dst)
  252. os.makedirs(ray_worker_container_dst, exist_ok=True)
  253. shutil.copy(source, ray_worker_container_dst)
  254. def check_staleness(repository, tag):
  255. DOCKER_CLIENT.api.pull(repository=repository, tag=tag)
  256. age = DOCKER_CLIENT.api.inspect_image(f"{repository}:{tag}")["Created"]
  257. short_date = datetime.datetime.strptime(age.split("T")[0], "%Y-%m-%d")
  258. is_stale = (datetime.datetime.now() - short_date) > datetime.timedelta(days=14)
  259. return is_stale
  260. def build_for_all_versions(image_name, py_versions, image_types, **kwargs):
  261. """Builds the given Docker image for all Python & CUDA versions"""
  262. for py_version in py_versions:
  263. for image_type in image_types:
  264. _build_docker_image(
  265. image_name, py_version=py_version, image_type=image_type, **kwargs
  266. )
  267. def build_base_images(py_versions, image_types):
  268. build_for_all_versions("base-deps", py_versions, image_types, no_cache=False)
  269. build_for_all_versions("ray-deps", py_versions, image_types, no_cache=False)
  270. def build_or_pull_base_images(
  271. py_versions: List[str], image_types: List[str], rebuild_base_images: bool = True
  272. ) -> bool:
  273. """Returns images to tag and build."""
  274. repositories = ["rayproject/base-deps", "rayproject/ray-deps"]
  275. tags = [
  276. f"nightly-{py_version}-{image_type}"
  277. for py_version, image_type in itertools.product(py_versions, image_types)
  278. ]
  279. try:
  280. is_stale = check_staleness(repositories[0], tags[0])
  281. # We still pull even if we have to rebuild the base images to help with
  282. # caching.
  283. for repository in repositories:
  284. for tag in tags:
  285. DOCKER_CLIENT.api.pull(repository=repository, tag=tag)
  286. except Exception as e:
  287. print(e)
  288. is_stale = True
  289. if rebuild_base_images or _release_build() or is_stale:
  290. build_base_images(py_versions, image_types)
  291. return True
  292. else:
  293. print("Just pulling images!")
  294. return False
  295. def prep_ray_ml():
  296. root_dir = _get_root_dir()
  297. requirement_files = glob.glob(
  298. f"{_get_root_dir()}/python/**/requirements*.txt", recursive=True
  299. )
  300. for fl in requirement_files:
  301. shutil.copy(fl, os.path.join(root_dir, "docker/ray-ml/"))
  302. def _get_docker_creds() -> Tuple[str, str]:
  303. docker_password = os.environ.get("DOCKER_PASSWORD")
  304. assert docker_password, "DOCKER_PASSWORD not set."
  305. return DOCKER_USERNAME, docker_password
  306. def _docker_push(image, tag):
  307. print(f"PUSHING: {image}:{tag}, result:")
  308. # This docker API is janky. Without "stream=True" it returns a
  309. # massive string filled with every progress bar update, which can
  310. # cause CI to back up.
  311. #
  312. # With stream=True, it's a line-at-a-time generator of the same
  313. # info. So we can slow it down by printing every couple hundred
  314. # lines
  315. i = 0
  316. for progress_line in DOCKER_CLIENT.api.push(image, tag=tag, stream=True):
  317. if i % 100 == 0:
  318. print(progress_line)
  319. def _tag_and_push(full_image_name, old_tag, new_tag, merge_build=False):
  320. # Do not tag release builds because they are no longer up to
  321. # date after the branch cut.
  322. if "nightly" in new_tag and _release_build():
  323. return
  324. if old_tag != new_tag:
  325. DOCKER_CLIENT.api.tag(
  326. image=f"{full_image_name}:{old_tag}",
  327. repository=full_image_name,
  328. tag=new_tag,
  329. )
  330. if not merge_build:
  331. print(
  332. "This is a PR Build! On a merge build, we would normally push"
  333. f"to: {full_image_name}:{new_tag}"
  334. )
  335. else:
  336. _docker_push(full_image_name, new_tag)
  337. def _create_new_tags(all_tags, old_str, new_str):
  338. new_tags = []
  339. for full_tag in all_tags:
  340. new_tag = full_tag.replace(old_str, new_str)
  341. new_tags.append(new_tag)
  342. return new_tags
  343. # For non-release builds, push "nightly" & "sha"
  344. # For release builds, push "nightly" & "latest" & "x.x.x"
  345. def push_and_tag_images(
  346. py_versions: List[str],
  347. image_types: List[str],
  348. push_base_images: bool,
  349. merge_build: bool = False,
  350. ):
  351. date_tag = datetime.datetime.now().strftime("%Y-%m-%d")
  352. sha_tag = _get_commit_sha()
  353. if _release_build():
  354. release_name = re.search("[0-9]+\.[0-9]+\.[0-9].*", _get_branch()).group(0)
  355. date_tag = release_name
  356. sha_tag = release_name
  357. image_list = ["ray", "ray-ml"]
  358. if push_base_images:
  359. image_list.extend(["base-deps", "ray-deps"])
  360. for image_name in image_list:
  361. full_image_name = f"rayproject/{image_name}"
  362. # Mapping from old tags to new tags.
  363. # These are the tags we will push.
  364. # The key is the full image name, and the values are all the tags
  365. # for that image.
  366. tag_mapping = defaultdict(list)
  367. for py_name in py_versions:
  368. for image_type in image_types:
  369. if image_name == "ray-ml" and image_type not in [
  370. ML_CUDA_VERSION,
  371. "cpu",
  372. ]:
  373. print(
  374. "ML Docker image is not built for the following "
  375. f"device type: {image_type}"
  376. )
  377. continue
  378. # TODO(https://github.com/ray-project/ray/issues/16599):
  379. # remove below after supporting ray-ml images with Python 3.9
  380. if image_name in ["ray-ml"] and (
  381. PY_MATRIX[py_name].startswith("3.9")
  382. or PY_MATRIX[py_name].startswith("3.10")
  383. ):
  384. print(
  385. f"{image_name} image is currently "
  386. f"unsupported with Python 3.9/3.10"
  387. )
  388. continue
  389. tag = f"nightly-{py_name}-{image_type}"
  390. tag_mapping[tag].append(tag)
  391. # If no device is specified, it should map to CPU image.
  392. # For ray-ml image, if no device specified, it should map to GPU image.
  393. # "-gpu" tag should refer to the ML_CUDA_VERSION
  394. for old_tag in tag_mapping.keys():
  395. if "cpu" in old_tag and image_name != "ray-ml":
  396. new_tags = _create_new_tags(
  397. tag_mapping[old_tag], old_str="-cpu", new_str=""
  398. )
  399. tag_mapping[old_tag].extend(new_tags)
  400. elif ML_CUDA_VERSION in old_tag:
  401. new_tags = _create_new_tags(
  402. tag_mapping[old_tag], old_str=f"-{ML_CUDA_VERSION}", new_str="-gpu"
  403. )
  404. tag_mapping[old_tag].extend(new_tags)
  405. if image_name == "ray-ml":
  406. new_tags = _create_new_tags(
  407. tag_mapping[old_tag], old_str=f"-{ML_CUDA_VERSION}", new_str=""
  408. )
  409. tag_mapping[old_tag].extend(new_tags)
  410. # No Python version specified should refer to DEFAULT_PYTHON_VERSION
  411. for old_tag in tag_mapping.keys():
  412. if DEFAULT_PYTHON_VERSION in old_tag:
  413. new_tags = _create_new_tags(
  414. tag_mapping[old_tag],
  415. old_str=f"-{DEFAULT_PYTHON_VERSION}",
  416. new_str="",
  417. )
  418. tag_mapping[old_tag].extend(new_tags)
  419. # For all tags, create Date/Sha tags
  420. for old_tag in tag_mapping.keys():
  421. new_tags = _create_new_tags(
  422. tag_mapping[old_tag],
  423. old_str="nightly",
  424. new_str=date_tag if "-deps" in image_name else sha_tag,
  425. )
  426. tag_mapping[old_tag].extend(new_tags)
  427. # Sanity checking.
  428. for old_tag in tag_mapping.keys():
  429. if DEFAULT_PYTHON_VERSION in old_tag:
  430. if "-cpu" in old_tag:
  431. assert "nightly-cpu" in tag_mapping[old_tag]
  432. if "-deps" in image_name:
  433. assert "nightly" in tag_mapping[old_tag]
  434. assert f"{date_tag}-cpu" in tag_mapping[old_tag]
  435. assert f"{date_tag}" in tag_mapping[old_tag]
  436. elif image_name == "ray":
  437. assert "nightly" in tag_mapping[old_tag]
  438. assert f"{sha_tag}-cpu" in tag_mapping[old_tag]
  439. assert f"{sha_tag}" in tag_mapping[old_tag]
  440. # For ray-ml, nightly should refer to the GPU image.
  441. elif image_name == "ray-ml":
  442. assert f"{sha_tag}-cpu" in tag_mapping[old_tag]
  443. else:
  444. raise RuntimeError(f"Invalid image name: {image_name}")
  445. elif ML_CUDA_VERSION in old_tag:
  446. assert "nightly-gpu" in tag_mapping[old_tag]
  447. if "-deps" in image_name:
  448. assert f"{date_tag}-gpu" in tag_mapping[old_tag]
  449. elif image_name == "ray":
  450. assert f"{sha_tag}-gpu" in tag_mapping[old_tag]
  451. # For ray-ml, nightly should refer to the GPU image.
  452. elif image_name == "ray-ml":
  453. assert "nightly" in tag_mapping[old_tag]
  454. assert f"{sha_tag}" in tag_mapping[old_tag]
  455. assert f"{sha_tag}-gpu" in tag_mapping[old_tag]
  456. else:
  457. raise RuntimeError(f"Invalid image name: {image_name}")
  458. print(f"These tags will be created for {image_name}: ", tag_mapping)
  459. # Tag and push all images.
  460. for old_tag in tag_mapping.keys():
  461. for new_tag in tag_mapping[old_tag]:
  462. _tag_and_push(
  463. full_image_name,
  464. old_tag=old_tag,
  465. new_tag=new_tag,
  466. merge_build=merge_build,
  467. )
  468. # Push infra here:
  469. # https://github.com/christian-korneck/docker-pushrm/blob/master/README-containers.md#push-a-readme-file-to-dockerhub # noqa
  470. def push_readmes(merge_build: bool):
  471. if not merge_build:
  472. print("Not pushing README because this is a PR build.")
  473. return
  474. username, password = _get_docker_creds()
  475. for image, tag_line in DOCKER_HUB_DESCRIPTION.items():
  476. environment = {
  477. "DOCKER_USER": username,
  478. "DOCKER_PASS": password,
  479. "PUSHRM_FILE": f"/myvol/docker/{image}/README.md",
  480. "PUSHRM_DEBUG": 1,
  481. "PUSHRM_SHORT": tag_line,
  482. }
  483. cmd_string = f"rayproject/{image}"
  484. print(
  485. DOCKER_CLIENT.containers.run(
  486. "chko/docker-pushrm:1",
  487. command=cmd_string,
  488. volumes={
  489. os.path.abspath(_get_root_dir()): {
  490. "bind": "/myvol",
  491. "mode": "rw",
  492. }
  493. },
  494. environment=environment,
  495. remove=True,
  496. detach=False,
  497. stderr=True,
  498. stdout=True,
  499. tty=False,
  500. )
  501. )
  502. # Build base-deps/ray-deps only on file change, 2 weeks, per release
  503. # Build ray, ray-ml every time
  504. # build-docker-images.py --py-versions PY37 --build-type PR --rebuild-all
  505. MERGE = "MERGE"
  506. HUMAN = "HUMAN"
  507. PR = "PR"
  508. BUILDKITE = "BUILDKITE"
  509. LOCAL = "LOCAL"
  510. BUILD_TYPES = [MERGE, HUMAN, PR, BUILDKITE, LOCAL]
  511. if __name__ == "__main__":
  512. parser = argparse.ArgumentParser()
  513. parser.add_argument(
  514. "--py-versions",
  515. choices=list(PY_MATRIX.keys()),
  516. default="py37",
  517. nargs="*",
  518. help="Which python versions to build. " "Must be in (py36, py37, py38, py39)",
  519. )
  520. parser.add_argument(
  521. "--device-types",
  522. choices=list(BASE_IMAGES.keys()),
  523. default=None,
  524. nargs="*",
  525. help="Which device types (CPU/CUDA versions) to build images for. "
  526. "If not specified, images will be built for all device types.",
  527. )
  528. parser.add_argument(
  529. "--build-type",
  530. choices=BUILD_TYPES,
  531. required=True,
  532. help="Whether to bypass checking if docker is affected",
  533. )
  534. parser.add_argument(
  535. "--build-base",
  536. dest="base",
  537. action="store_true",
  538. help="Whether to build base-deps & ray-deps",
  539. )
  540. parser.add_argument("--no-build-base", dest="base", action="store_false")
  541. parser.set_defaults(base=True)
  542. parser.add_argument(
  543. "--only-build-worker-container",
  544. dest="only_build_worker_container",
  545. action="store_true",
  546. help="Whether only to build ray-worker-container",
  547. )
  548. parser.set_defaults(only_build_worker_container=False)
  549. args = parser.parse_args()
  550. py_versions = args.py_versions
  551. py_versions = py_versions if isinstance(py_versions, list) else [py_versions]
  552. image_types = args.device_types if args.device_types else list(BASE_IMAGES.keys())
  553. assert set(list(CUDA_FULL.keys()) + ["cpu"]) == set(BASE_IMAGES.keys())
  554. # Make sure the python images and cuda versions we build here are
  555. # consistent with the ones used with fix-latest-docker.sh script.
  556. py_version_file = os.path.join(
  557. _get_root_dir(), "docker/retag-lambda", "python_versions.txt"
  558. )
  559. with open(py_version_file) as f:
  560. py_file_versions = f.read().splitlines()
  561. assert set(PY_MATRIX.keys()) == set(py_file_versions), (
  562. PY_MATRIX.keys(),
  563. py_file_versions,
  564. )
  565. cuda_version_file = os.path.join(
  566. _get_root_dir(), "docker/retag-lambda", "cuda_versions.txt"
  567. )
  568. with open(cuda_version_file) as f:
  569. cuda_file_versions = f.read().splitlines()
  570. assert set(BASE_IMAGES.keys()) == set(cuda_file_versions + ["cpu"]), (
  571. BASE_IMAGES.keys(),
  572. cuda_file_versions + ["cpu"],
  573. )
  574. print(
  575. "Building the following python versions: ",
  576. [PY_MATRIX[py_version] for py_version in py_versions],
  577. )
  578. print("Building images for the following devices: ", image_types)
  579. print("Building base images: ", args.base)
  580. build_type = args.build_type
  581. is_buildkite = build_type == BUILDKITE
  582. is_local = build_type == LOCAL
  583. if build_type == BUILDKITE:
  584. if os.environ.get("BUILDKITE_PULL_REQUEST", "") == "false":
  585. build_type = MERGE
  586. else:
  587. build_type = PR
  588. if build_type == HUMAN:
  589. # If manually triggered, request user for branch and SHA value to use.
  590. _configure_human_version()
  591. if (
  592. build_type in {HUMAN, MERGE, BUILDKITE, LOCAL}
  593. or _check_if_docker_files_modified()
  594. or args.only_build_worker_container
  595. ):
  596. DOCKER_CLIENT = docker.from_env()
  597. is_merge = build_type == MERGE
  598. # Buildkite is authenticated in the background.
  599. if is_merge and not is_buildkite and not is_local:
  600. # We do this here because we want to be authenticated for
  601. # Docker pulls as well as pushes (to avoid rate-limits).
  602. username, password = _get_docker_creds()
  603. DOCKER_CLIENT.api.login(username=username, password=password)
  604. copy_wheels(build_type == HUMAN)
  605. is_base_images_built = build_or_pull_base_images(
  606. py_versions, image_types, args.base
  607. )
  608. if args.only_build_worker_container:
  609. build_for_all_versions("ray-worker-container", py_versions, image_types)
  610. # TODO Currently don't push ray_worker_container
  611. else:
  612. # Build Ray Docker images.
  613. build_for_all_versions("ray", py_versions, image_types)
  614. # Only build ML Docker images for ML_CUDA_VERSION or cpu.
  615. ml_image_types = [
  616. image_type
  617. for image_type in image_types
  618. if image_type in [ML_CUDA_VERSION, "cpu"]
  619. ]
  620. if len(ml_image_types) > 0:
  621. prep_ray_ml()
  622. build_for_all_versions(
  623. "ray-ml", py_versions, image_types=ml_image_types
  624. )
  625. if build_type in {MERGE, PR}:
  626. valid_branch = _valid_branch()
  627. if (not valid_branch) and is_merge:
  628. print(f"Invalid Branch found: {_get_branch()}")
  629. push_and_tag_images(
  630. py_versions,
  631. image_types,
  632. is_base_images_built,
  633. valid_branch and is_merge,
  634. )
  635. # TODO(ilr) Re-Enable Push READMEs by using a normal password
  636. # (not auth token :/)
  637. # push_readmes(build_type is MERGE)