123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- import argparse
- import asyncio
- import logging
- import logging.handlers
- import platform
- import traceback
- import ray.dashboard.consts as dashboard_consts
- import ray.dashboard.head as dashboard_head
- import ray.dashboard.utils as dashboard_utils
- import ray.ray_constants as ray_constants
- import ray._private.gcs_utils as gcs_utils
- import ray._private.services
- import ray._private.utils
- from ray._private.gcs_pubsub import gcs_pubsub_enabled, GcsPublisher
- from ray._private.ray_logging import setup_component_logger
- # Logger for this module. It should be configured at the entry point
- # into the program using Ray. Ray provides a default configuration at
- # entry/init points.
- logger = logging.getLogger(__name__)
- class Dashboard:
- """A dashboard process for monitoring Ray nodes.
- This dashboard is made up of a REST API which collates data published by
- Reporter processes on nodes into a json structure, and a webserver
- which polls said API for display purposes.
- Args:
- host(str): Host address of dashboard aiohttp server.
- port(int): Port number of dashboard aiohttp server.
- port_retries(int): The retry times to select a valid port.
- gcs_address(str): GCS address of the cluster
- redis_address(str): Redis address of a Ray cluster
- redis_password(str): Redis password to access GCS
- log_dir(str): Log directory of dashboard.
- """
- def __init__(
- self,
- host,
- port,
- port_retries,
- gcs_address,
- redis_address,
- redis_password=None,
- log_dir=None,
- temp_dir=None,
- minimal=False,
- ):
- self.dashboard_head = dashboard_head.DashboardHead(
- http_host=host,
- http_port=port,
- http_port_retries=port_retries,
- gcs_address=gcs_address,
- redis_address=redis_address,
- redis_password=redis_password,
- log_dir=log_dir,
- temp_dir=temp_dir,
- minimal=minimal,
- )
- async def run(self):
- await self.dashboard_head.run()
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Ray dashboard.")
- parser.add_argument(
- "--host", required=True, type=str, help="The host to use for the HTTP server."
- )
- parser.add_argument(
- "--port", required=True, type=int, help="The port to use for the HTTP server."
- )
- parser.add_argument(
- "--port-retries",
- required=False,
- type=int,
- default=0,
- help="The retry times to select a valid port.",
- )
- parser.add_argument(
- "--gcs-address", required=False, type=str, help="The address (ip:port) of GCS."
- )
- parser.add_argument(
- "--redis-address", required=True, type=str, help="The address to use for Redis."
- )
- parser.add_argument(
- "--redis-password",
- required=False,
- type=str,
- default=None,
- help="The password to use for Redis",
- )
- parser.add_argument(
- "--logging-level",
- required=False,
- type=lambda s: logging.getLevelName(s.upper()),
- default=ray_constants.LOGGER_LEVEL,
- choices=ray_constants.LOGGER_LEVEL_CHOICES,
- help=ray_constants.LOGGER_LEVEL_HELP,
- )
- parser.add_argument(
- "--logging-format",
- required=False,
- type=str,
- default=ray_constants.LOGGER_FORMAT,
- help=ray_constants.LOGGER_FORMAT_HELP,
- )
- parser.add_argument(
- "--logging-filename",
- required=False,
- type=str,
- default=dashboard_consts.DASHBOARD_LOG_FILENAME,
- help="Specify the name of log file, "
- 'log to stdout if set empty, default is "{}"'.format(
- dashboard_consts.DASHBOARD_LOG_FILENAME
- ),
- )
- parser.add_argument(
- "--logging-rotate-bytes",
- required=False,
- type=int,
- default=ray_constants.LOGGING_ROTATE_BYTES,
- help="Specify the max bytes for rotating "
- "log file, default is {} bytes.".format(ray_constants.LOGGING_ROTATE_BYTES),
- )
- parser.add_argument(
- "--logging-rotate-backup-count",
- required=False,
- type=int,
- default=ray_constants.LOGGING_ROTATE_BACKUP_COUNT,
- help="Specify the backup count of rotated log file, default is {}.".format(
- ray_constants.LOGGING_ROTATE_BACKUP_COUNT
- ),
- )
- parser.add_argument(
- "--log-dir",
- required=True,
- type=str,
- default=None,
- help="Specify the path of log directory.",
- )
- parser.add_argument(
- "--temp-dir",
- required=True,
- type=str,
- default=None,
- help="Specify the path of the temporary directory use by Ray process.",
- )
- parser.add_argument(
- "--minimal",
- action="store_true",
- help=(
- "Minimal dashboard only contains a subset of features that don't "
- "require additional dependencies installed when ray is installed "
- "by `pip install ray[default]`."
- ),
- )
- args = parser.parse_args()
- if gcs_utils.use_gcs_for_bootstrap():
- args.redis_address = None
- args.redis_password = None
- else:
- args.gcs_address = None
- try:
- setup_component_logger(
- logging_level=args.logging_level,
- logging_format=args.logging_format,
- log_dir=args.log_dir,
- filename=args.logging_filename,
- max_bytes=args.logging_rotate_bytes,
- backup_count=args.logging_rotate_backup_count,
- )
- dashboard = Dashboard(
- args.host,
- args.port,
- args.port_retries,
- args.gcs_address,
- args.redis_address,
- redis_password=args.redis_password,
- log_dir=args.log_dir,
- temp_dir=args.temp_dir,
- minimal=args.minimal,
- )
- loop = asyncio.get_event_loop()
- loop.run_until_complete(dashboard.run())
- except Exception as e:
- traceback_str = ray._private.utils.format_error_message(traceback.format_exc())
- message = (
- f"The dashboard on node {platform.uname()[1]} "
- f"failed with the following "
- f"error:\n{traceback_str}"
- )
- if isinstance(e, dashboard_utils.FrontendNotFoundError):
- logger.warning(message)
- else:
- logger.error(message)
- raise e
- # Something went wrong, so push an error to all drivers.
- redis_client = None
- gcs_publisher = None
- if gcs_pubsub_enabled():
- if gcs_utils.use_gcs_for_bootstrap():
- gcs_publisher = GcsPublisher(args.gcs_address)
- else:
- redis_client = ray._private.services.create_redis_client(
- args.redis_address, password=args.redis_password
- )
- gcs_publisher = GcsPublisher(
- address=gcs_utils.get_gcs_address_from_redis(redis_client)
- )
- redis_client = None
- else:
- redis_client = ray._private.services.create_redis_client(
- args.redis_address, password=args.redis_password
- )
- ray._private.utils.publish_error_to_driver(
- redis_client,
- ray_constants.DASHBOARD_DIED_ERROR,
- message,
- redis_client=redis_client,
- gcs_publisher=gcs_publisher,
- )
|