123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- kind: ConfigMap
- apiVersion: v1
- metadata:
- name: locusttest-{cluster_id}
- data:
- locustfile.py: |
- {locustfile}
- ---
- kind: ConfigMap
- apiVersion: v1
- metadata:
- name: script-{cluster_id}
- data:
- solution.py: |
- {solution}
- ---
- kind: ConfigMap
- apiVersion: v1
- metadata:
- name: redis-config-{cluster_id}
- labels:
- app: redis
- data:
- redis.conf: |-
- dir /data
- port 6379
- bind 0.0.0.0
- appendonly yes
- protected-mode no
- requirepass 5241590000000000
- pidfile /data/redis-6379.pid
- ---
- apiVersion: v1
- kind: Service
- metadata:
- name: redis-{cluster_id}
- labels:
- app: redis
- spec:
- type: ClusterIP
- ports:
- - name: redis
- port: 6379
- selector:
- app: redis
- ---
- apiVersion: apps/v1
- kind: Deployment
- metadata:
- name: redis-{cluster_id}
- labels:
- app: redis
- spec:
- replicas: 1
- selector:
- matchLabels:
- app: redis
- template:
- metadata:
- labels:
- app: redis
- spec:
- containers:
- - name: redis
- image: redis:5.0.8
- command:
- - "sh"
- - "-c"
- - "redis-server /usr/local/etc/redis/redis.conf"
- ports:
- - containerPort: 6379
- volumeMounts:
- - name: config
- mountPath: /usr/local/etc/redis/redis.conf
- subPath: redis.conf
- volumes:
- - name: config
- configMap:
- name: redis-config-{cluster_id}
- ---
- apiVersion: ray.io/v1alpha1
- kind: RayService
- metadata:
- name: service-{cluster_id}
- annotations:
- ray.io/ft-enabled: "true"
- spec:
- serviceUnhealthySecondThreshold: 300
- deploymentUnhealthySecondThreshold: 300
- serveConfig:
- importPath: solution.serve_entrypoint
- runtimeEnv: |
- env_vars:
- PYTHONPATH: "/tmp/testing/"
- deployments:
- - name: a
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- - name: b
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- - name: c
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- - name: d
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- - name: e
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- - name: DAGDriver
- numReplicas: 6
- rayActorOptions:
- numCpus: 1
- rayClusterConfig:
- rayVersion: '3.0.0.dev0' # should match the Ray version in the image of the containers
- ######################headGroupSpecs#################################
- # head group template and specs, (perhaps 'group' is not needed in the name)
- headGroupSpec:
- # Kubernetes Service Type, valid values are 'ClusterIP', 'NodePort' and 'LoadBalancer'
- serviceType: ClusterIP
- # the pod replicas in this group typed head (assuming there could be more than 1 in the future)
- replicas: 1
- # logical group name, for this called head-group, also can be functional
- # pod type head or worker
- # rayNodeType: head # Not needed since it is under the headgroup
- # the following params are used to complete the ray start: ray start --head --block --redis-port=6379 ...
- rayStartParams:
- port: '6379' # should match container port named gcs-server
- object-store-memory: '100000000'
- dashboard-host: '0.0.0.0'
- num-cpus: '0' # can be auto-completed from the limits
- node-ip-address: $MY_POD_IP # auto-completed as the head pod IP
- block: 'true'
- #pod template
- template:
- metadata:
- labels:
- rayCluster: cluster-{cluster_id}
- rayNodeType: head # will be injected if missing, must be head or wroker
- groupName: headgroup # will be injected if missing
- # annotations for pod
- annotations:
- key: value
- spec:
- volumes:
- - name: script
- configMap:
- name: script-{cluster_id}
- - name: log-volume
- emptyDir: {{}}
- containers:
- - name: ray-head
- image: {ray_image}
- imagePullPolicy: Always
- env:
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_REDIS_ADDRESS
- value: redis-{cluster_id}:6379
- - name: RAY_gcs_rpc_server_reconnect_timeout_s
- value: "600"
- - name: SERVE_DEPLOYMENT_HANDLE_IS_SYNC
- value: "1"
- resources:
- limits:
- cpu: 2
- requests:
- cpu: 2
- ports:
- - containerPort: 6379
- name: gcs-server
- - containerPort: 8265 # Ray dashboard
- name: dashboard
- - containerPort: 10001
- name: client
- - containerPort: 8000
- name: serve
- volumeMounts:
- - name: script
- mountPath: /tmp/testing/solution.py
- subPath: solution.py
- - mountPath: /tmp/ray/
- name: log-volume
- workerGroupSpecs:
- # the pod replicas in this group typed worker
- - replicas: 12
- minReplicas: 12
- maxReplicas: 12
- # logical group name, for this called small-group, also can be functional
- groupName: small-group
- # if worker pods need to be added, we can simply increment the replicas
- # if worker pods need to be removed, we decrement the replicas, and populate the podsToDelete list
- # the operator will remove pods from the list until the number of replicas is satisfied
- # when a pod is confirmed to be deleted, its name will be removed from the list below
- #scaleStrategy:
- # workersToDelete:
- # - raycluster-complete-worker-small-group-bdtwh
- # - raycluster-complete-worker-small-group-hv457
- # - raycluster-complete-worker-small-group-k8tj7
- # the following params are used to complete the ray start: ray start --block --node-ip-address= ...
- rayStartParams:
- node-ip-address: $MY_POD_IP
- block: 'true'
- num-cpus: '4' # can be auto-completed from the limits
- #pod template
- template:
- metadata:
- labels:
- key: value
- rayCluster: cluster-{cluster_id}
- # annotations for pod
- annotations:
- key: value
- spec:
- initContainers:
- # the env var $RAY_IP is set by the operator if missing, with the value of the head service name
- - name: init-myservice
- image: busybox:1.28
- command: ['sh', '-c', "until nslookup $RAY_IP.$(cat /var/run/secrets/kubernetes.io/serviceaccount/namespace).svc.cluster.local; do echo waiting for myservice; sleep 2; done"]
- volumes:
- - name: script
- configMap:
- name: script-{cluster_id}
- - name: log-volume
- emptyDir: {{}}
- containers:
- - name: machine-learning # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc'
- image: {ray_image}
- imagePullPolicy: Always
- livenessProbe:
- initialDelaySeconds: 30
- periodSeconds: 5
- timeoutSeconds: 10
- readinessProbe:
- initialDelaySeconds: 30
- periodSeconds: 5
- timeoutSeconds: 10
- # environment variables to set in the container.Optional.
- # Refer to https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/
- env:
- - name: RAY_DISABLE_DOCKER_CPU_WARNING
- value: "1"
- - name: TYPE
- value: "worker"
- - name: CPU_REQUEST
- valueFrom:
- resourceFieldRef:
- containerName: machine-learning
- resource: requests.cpu
- - name: CPU_LIMITS
- valueFrom:
- resourceFieldRef:
- containerName: machine-learning
- resource: limits.cpu
- - name: MEMORY_LIMITS
- valueFrom:
- resourceFieldRef:
- containerName: machine-learning
- resource: limits.memory
- - name: MEMORY_REQUESTS
- valueFrom:
- resourceFieldRef:
- containerName: machine-learning
- resource: requests.memory
- - name: MY_POD_NAME
- valueFrom:
- fieldRef:
- fieldPath: metadata.name
- - name: MY_POD_IP
- valueFrom:
- fieldRef:
- fieldPath: status.podIP
- - name: RAY_gcs_rpc_server_reconnect_timeout_s
- value: "600"
- - name: RAY_gcs_server_request_timeout_seconds
- value: "5"
- - name: SERVE_DEPLOYMENT_HANDLE_IS_SYNC
- value: "1"
- ports:
- - containerPort: 80
- name: client
- lifecycle:
- preStop:
- exec:
- command: ["/bin/sh","-c","ray stop"]
- resources:
- limits:
- cpu: "2"
- requests:
- cpu: "2"
- volumeMounts:
- - name: script
- mountPath: /tmp/testing/solution.py
- subPath: solution.py
- - mountPath: /tmp/ray/
- name: log-volume
|