update_cache_env.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. import pickle
  2. from sphinx.project import Project
  3. import os
  4. import time
  5. from typing import List
  6. from datetime import datetime
  7. import click
  8. PENDING_FILES_PATH = "pending_files.txt"
  9. ENVIRONMENT_PICKLE = "_build/doctrees/environment.pickle"
  10. def list_pending_files(ray_dir: str) -> List[str]:
  11. """List all files that are added/modified in git repo."""
  12. pending_files = []
  13. with open(f"{ray_dir}/{PENDING_FILES_PATH}", "r") as f:
  14. pending_files = f.readlines()
  15. pending_files = [file.strip() for file in pending_files]
  16. os.remove(f"{ray_dir}/{PENDING_FILES_PATH}")
  17. for i in range(len(pending_files)):
  18. if pending_files[i].split(".")[-1] != "py":
  19. pending_files[i] = pending_files[i].split(".")[0]
  20. return pending_files
  21. def update_environment_pickle(ray_dir: str, pending_files: List[str]) -> None:
  22. """
  23. Update the environment pickle file with
  24. new source and doctree directory, and modify source file timestamps.
  25. """
  26. ray_doc_dir = os.path.join(ray_dir, "doc")
  27. with open(os.path.join(ray_doc_dir, ENVIRONMENT_PICKLE), "rb+") as f:
  28. env = pickle.load(f)
  29. # Update cache's environment source and doctree directory to the host path
  30. env.srcdir = os.path.join(ray_doc_dir, "source")
  31. env.doctreedir = os.path.join(ray_doc_dir, "_build/doctrees")
  32. env.project.srcdir = os.path.join(ray_doc_dir, "source")
  33. p = Project(
  34. os.path.join(ray_doc_dir, "source"),
  35. {".rst": "restructuredtext", ".md": "myst-nb", ".ipynb": "myst-nb"},
  36. )
  37. p.discover()
  38. env.project = p
  39. # all_docs is a map of source doc name -> last modified timestamp
  40. # Update timestamp of all docs, except the pending ones
  41. # to a later timestamp so they are not marked outdated and rebuilt.
  42. for doc, val in env.all_docs.items():
  43. if doc not in pending_files:
  44. env.all_docs[doc] = int(time.time()) * 1000000
  45. # Write the updated environment pickle file back
  46. with open(
  47. os.path.join(ray_doc_dir, "_build/doctrees/environment.pickle"), "wb+"
  48. ) as f:
  49. pickle.dump(env, f, pickle.HIGHEST_PROTOCOL)
  50. # TODO(@khluu): Check if this is necessary. Only update changed template files.
  51. def update_file_timestamp(ray_dir: str, pending_files: List[str]) -> None:
  52. """
  53. Update files other than source files to
  54. an old timestamp to avoid rebuilding them.
  55. """
  56. ray_doc_dir = os.path.join(ray_dir, "doc")
  57. # Update all target html files timestamp to the current time
  58. new_timestamp = datetime.now().timestamp()
  59. directory = f"{ray_doc_dir}/_build/html/"
  60. for root, dirs, files in os.walk(directory):
  61. for file in files:
  62. file_path = os.path.join(root, file)
  63. try:
  64. # Change the access and modification times
  65. os.utime(file_path, (new_timestamp, new_timestamp))
  66. except Exception as e:
  67. print(f"Failed to change timestamp for {file_path}: {str(e)}")
  68. # Update Makefile timestamp
  69. os.utime(f"{ray_doc_dir}/Makefile", (new_timestamp, new_timestamp))
  70. new_timestamp = datetime.now().timestamp()
  71. for file in pending_files:
  72. if file.split(".")[-1] != "py":
  73. continue
  74. file_path = os.path.join(ray_dir, file)
  75. try:
  76. # Change the access and modification times
  77. os.utime(file_path, (new_timestamp, new_timestamp))
  78. except Exception as e:
  79. print(f"Failed to change timestamp for {file_path}: {str(e)}")
  80. print("Timestamp change operation completed.")
  81. @click.command()
  82. @click.option("--ray-dir", required=True, type=str, help="Path to the Ray repository.")
  83. def main(ray_dir: str) -> None:
  84. if not os.path.exists(f"{ray_dir}/{PENDING_FILES_PATH}"):
  85. print("Global cache was not loaded. Skip updating cache environment.")
  86. return
  87. print("Updating cache environment ...")
  88. pending_files = list_pending_files(ray_dir)
  89. update_environment_pickle(ray_dir, pending_files)
  90. update_file_timestamp(ray_dir, pending_files)
  91. if __name__ == "__main__":
  92. main()