Dockerfile 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. # Use the NVIDIA official image with PyTorch 2.3.0
  2. # https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-24-02.html
  3. FROM nvcr.io/nvidia/pytorch:24.02-py3
  4. # Define environments
  5. ENV MAX_JOBS=4
  6. ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
  7. ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
  8. # Define installation arguments
  9. ARG INSTALL_BNB=false
  10. ARG INSTALL_VLLM=false
  11. ARG INSTALL_DEEPSPEED=false
  12. ARG INSTALL_FLASHATTN=false
  13. ARG INSTALL_LIGER_KERNEL=false
  14. ARG INSTALL_HQQ=false
  15. ARG INSTALL_EETQ=false
  16. ARG PIP_INDEX=https://pypi.org/simple
  17. # Set the working directory
  18. WORKDIR /app
  19. # Install the requirements
  20. COPY requirements.txt /app
  21. RUN pip config set global.index-url "$PIP_INDEX" && \
  22. pip config set global.extra-index-url "$PIP_INDEX" && \
  23. python -m pip install --upgrade pip && \
  24. python -m pip install -r requirements.txt
  25. # Copy the rest of the application into the image
  26. COPY . /app
  27. # Install the LLaMA Factory
  28. RUN EXTRA_PACKAGES="metrics"; \
  29. if [ "$INSTALL_BNB" == "true" ]; then \
  30. EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
  31. fi; \
  32. if [ "$INSTALL_VLLM" == "true" ]; then \
  33. EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
  34. fi; \
  35. if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
  36. EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
  37. fi; \
  38. if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
  39. EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
  40. fi; \
  41. if [ "$INSTALL_HQQ" == "true" ]; then \
  42. EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
  43. fi; \
  44. if [ "$INSTALL_EETQ" == "true" ]; then \
  45. EXTRA_PACKAGES="${EXTRA_PACKAGES},eetq"; \
  46. fi; \
  47. pip install -e ".[$EXTRA_PACKAGES]"
  48. # Rebuild flash attention
  49. RUN pip uninstall -y transformer-engine flash-attn && \
  50. if [ "$INSTALL_FLASHATTN" == "true" ]; then \
  51. pip uninstall -y ninja && pip install ninja && \
  52. pip install --no-cache-dir flash-attn --no-build-isolation; \
  53. fi
  54. # Set up volumes
  55. VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
  56. # Expose port 7860 for the LLaMA Board
  57. ENV GRADIO_SERVER_PORT 7860
  58. EXPOSE 7860
  59. # Expose port 8000 for the API service
  60. ENV API_PORT 8000
  61. EXPOSE 8000