Dockerfile 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. FROM hardandheavy/transformers-rocm:2.2.0
  2. # Define environments
  3. ENV MAX_JOBS=4
  4. ENV FLASH_ATTENTION_FORCE_BUILD=TRUE
  5. ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
  6. # Define installation arguments
  7. ARG INSTALL_BNB=false
  8. ARG INSTALL_VLLM=false
  9. ARG INSTALL_DEEPSPEED=false
  10. ARG INSTALL_FLASHATTN=false
  11. ARG INSTALL_LIGER_KERNEL=false
  12. ARG INSTALL_HQQ=false
  13. ARG PIP_INDEX=https://pypi.org/simple
  14. # Set the working directory
  15. WORKDIR /app
  16. # Install the requirements
  17. COPY requirements.txt /app
  18. RUN pip config set global.index-url "$PIP_INDEX" && \
  19. pip config set global.extra-index-url "$PIP_INDEX" && \
  20. python -m pip install --upgrade pip && \
  21. python -m pip install -r requirements.txt
  22. # Copy the rest of the application into the image
  23. COPY . /app
  24. # Install the LLaMA Factory
  25. RUN EXTRA_PACKAGES="metrics"; \
  26. if [ "$INSTALL_BNB" == "true" ]; then \
  27. EXTRA_PACKAGES="${EXTRA_PACKAGES},bitsandbytes"; \
  28. fi; \
  29. if [ "$INSTALL_VLLM" == "true" ]; then \
  30. EXTRA_PACKAGES="${EXTRA_PACKAGES},vllm"; \
  31. fi; \
  32. if [ "$INSTALL_DEEPSPEED" == "true" ]; then \
  33. EXTRA_PACKAGES="${EXTRA_PACKAGES},deepspeed"; \
  34. fi; \
  35. if [ "$INSTALL_LIGER_KERNEL" == "true" ]; then \
  36. EXTRA_PACKAGES="${EXTRA_PACKAGES},liger-kernel"; \
  37. fi; \
  38. if [ "$INSTALL_HQQ" == "true" ]; then \
  39. EXTRA_PACKAGES="${EXTRA_PACKAGES},hqq"; \
  40. fi; \
  41. pip install -e ".[$EXTRA_PACKAGES]"
  42. # Rebuild flash attention
  43. RUN pip uninstall -y transformer-engine flash-attn && \
  44. if [ "$INSTALL_FLASHATTN" == "true" ]; then \
  45. pip uninstall -y ninja && pip install ninja && \
  46. pip install --no-cache-dir flash-attn --no-build-isolation; \
  47. fi
  48. # Set up volumes
  49. VOLUME [ "/root/.cache/huggingface", "/root/.cache/modelscope", "/app/data", "/app/output" ]
  50. # Expose port 7860 for the LLaMA Board
  51. ENV GRADIO_SERVER_PORT 7860
  52. EXPOSE 7860
  53. # Expose port 8000 for the API service
  54. ENV API_PORT 8000
  55. EXPOSE 8000