Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions api/src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Settings(BaseSettings):
)
use_gpu: bool = True # Whether to use GPU acceleration if available
device_type: str | None = (
None # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
None # Will be auto-detected if None, can be "cuda", "mps", "xpu", or "cpu"
)
allow_local_voice_saving: bool = (
False # Whether to allow saving combined voices locally
Expand Down Expand Up @@ -75,7 +75,7 @@ class Settings(BaseSettings):
cors_enabled: bool = True # Whether to enable CORS

# Temp File Settings for WEB Ui
temp_file_dir: str = "api/temp_files" # Directory for temporary audio files (relative to project root)
temp_file_dir: str = "/tmp/kokoro_temp" # Directory for temporary audio files
max_temp_dir_size_mb: int = 2048 # Maximum size of temp directory (2GB)
max_temp_dir_age_hours: int = 1 # Remove temp files older than 1 hour
max_temp_dir_count: int = 3 # Maximum number of temp files to keep
Expand All @@ -96,6 +96,14 @@ def get_device(self) -> str:
return "mps"
elif torch.cuda.is_available():
return "cuda"

# Check for Intel GPU (XPU)
try:
if hasattr(torch, "xpu") and torch.xpu.is_available():
return "xpu"
except Exception:
pass

return "cpu"


Expand Down
20 changes: 20 additions & 0 deletions api/src/inference/kokoro_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ async def load_model(self, path: str) -> None:
self._model = self._model.to(torch.device("mps"))
elif self._device == "cuda":
self._model = self._model.cuda()
elif self._device == "xpu":
self._model = self._model.to("xpu")
else:
self._model = self._model.cpu()

Expand Down Expand Up @@ -350,6 +352,12 @@ def _check_memory(self) -> bool:
if self._device == "cuda":
memory_gb = torch.cuda.memory_allocated() / 1e9
return memory_gb > model_config.pytorch_gpu.memory_threshold
elif self._device == "xpu":
try:
memory_gb = torch.xpu.memory_allocated() / 1e9
return memory_gb > model_config.pytorch_gpu.memory_threshold
except (RuntimeError, AttributeError):
pass
# MPS doesn't provide memory management APIs
return False

Expand All @@ -362,6 +370,12 @@ def _clear_memory(self) -> None:
# Empty cache if available (future-proofing)
if hasattr(torch.mps, "empty_cache"):
torch.mps.empty_cache()
elif self._device == "xpu":
try:
torch.xpu.empty_cache()
torch.xpu.synchronize()
except (RuntimeError, AttributeError):
pass

def unload(self) -> None:
"""Unload model and free resources."""
Expand All @@ -375,6 +389,12 @@ def unload(self) -> None:
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.synchronize()
try:
if hasattr(torch, "xpu") and torch.xpu.is_available():
torch.xpu.empty_cache()
torch.xpu.synchronize()
except Exception:
pass

@property
def is_loaded(self) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion api/src/inference/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def __init__(self, config: Optional[ModelConfig] = None):

def _determine_device(self) -> str:
"""Determine device based on settings."""
return "cuda" if settings.use_gpu else "cpu"
return settings.get_device()

async def initialize(self) -> None:
"""Initialize Kokoro V1 backend."""
Expand Down
2 changes: 2 additions & 0 deletions api/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ async def lifespan(app: FastAPI):
startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
elif device == "cuda":
startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
elif device == "xpu":
startup_msg += f"\nXPU: {torch.xpu.is_available()}"
else:
startup_msg += "\nRunning on CPU"
startup_msg += f"\n{voicepack_count} voice packs loaded"
Expand Down
32 changes: 30 additions & 2 deletions docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,21 @@ target "_rocm_base" {
]
}

# Base settings for Intel builds
target "_intel_base" {
inherits = ["_common"]
dockerfile = "docker/intel/Dockerfile"
labels = {
"org.opencontainers.image.title" = "Kokoro-FastAPI (Intel)"
"org.opencontainers.image.description" = "Kokoro TTS served via FastAPI. Intel GPU build (amd64 only)."
}
annotations = [
"org.opencontainers.image.title=Kokoro-FastAPI (Intel)",
"org.opencontainers.image.description=Kokoro TTS served via FastAPI. Intel GPU build (amd64 only).",
]
}



# Individual platform targets for debugging/testing
target "cpu-amd64" {
Expand Down Expand Up @@ -183,6 +198,15 @@ target "rocm-amd64" {
]
}

# Intel only supports x86
target "intel-amd64" {
inherits = ["_intel_base"]
platforms = ["linux/amd64"]
tags = [
"${REGISTRY}/${OWNER}/${REPO}-intel:${VERSION}-amd64"
]
}

# Development targets for faster local builds
target "cpu-dev" {
inherits = ["_cpu_base"]
Expand Down Expand Up @@ -223,10 +247,14 @@ group "rocm-all" {
targets = ["rocm-amd64"]
}

group "intel-all" {
targets = ["intel-amd64"]
}

group "all" {
targets = ["cpu", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64"]
targets = ["cpu", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64", "intel-amd64"]
}

group "individual-platforms" {
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64"]
targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64", "intel-amd64"]
}
104 changes: 104 additions & 0 deletions docker/intel/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# Stage 1: Builder - Use devel image for compilation
FROM ubuntu:24.04 AS builder

# Install Python and build dependencies
RUN apt-get update -y && \
apt-get install -y python3.10 python3-venv python3-dev git curl wget gnupg2 && \
apt-get clean && rm -rf /var/lib/apt/lists/* && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/ && \
mv /root/.local/bin/uvx /usr/local/bin/

WORKDIR /app

# Copy dependency files
COPY pyproject.toml ./pyproject.toml

# Install dependencies with intel extras
ENV UV_HTTP_TIMEOUT=120 UV_HTTP_RETRIES=3 \
UV_PYTHON_INSTALL_DIR=/opt/uv-python

RUN uv venv --python 3.10 && \
uv sync --extra intel --no-cache --no-install-project

# Stage 2: Runtime
FROM ubuntu:24.04

# Install runtime dependencies + uv
RUN apt-get update -y && \
apt-get install -y python3.10 espeak-ng espeak-ng-data libsndfile1 ffmpeg curl wget gnupg2 libnuma1 ocl-icd-libopencl1 libze1 intel-media-va-driver-non-free libmfx1 && \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/ && \
mv /root/.local/bin/uvx /usr/local/bin/ && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Add Intel oneAPI repository for MKL and other tools
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
apt-get update && \
apt-get install -y intel-oneapi-mkl intel-oneapi-runtime-mkl intel-oneapi-runtime-compilers intel-oneapi-compiler-dpcpp-cpp-runtime-2025.3 && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Install Intel GPU drivers (Compute Runtime & IGC) using the reference script
COPY docker/intel/install_ubuntu_gpu_drivers.sh /tmp/install_gpu_drivers.sh
ARG INSTALL_DRIVER_VERSION="25.35.35096"
RUN chmod +x /tmp/install_gpu_drivers.sh && \
/tmp/install_gpu_drivers.sh && \
rm -f /tmp/install_gpu_drivers.sh

# Setup user and GID
ARG RENDER_GID=992
RUN mkdir -p /usr/share/espeak-ng-data && \
ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
(groupmod -g ${RENDER_GID} render || groupadd -g ${RENDER_GID} render) && \
useradd -m -u 1001 -G video,render appuser && \
mkdir -p /app/api/src/models/v1_0 && \
chown -R appuser:appuser /app

WORKDIR /app

# Copy uv-managed Python interpreter (the venv's bin/python symlinks into here)
COPY --from=builder /opt/uv-python /opt/uv-python

# Copy virtual environment from builder
COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv

# Download model in runtime stage so download_model.py is present for runtime re-downloads via entrypoint
COPY --chown=appuser:appuser docker/scripts/download_model.py ./download_model.py
ARG DOWNLOAD_MODEL=true
RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
/app/.venv/bin/python download_model.py --output api/src/models/v1_0 && \
chown -R appuser:appuser /app/api/src/models; \
fi

# Japanese support requires the UniDic dictionary
ARG INCLUDE_JAPANESE=true
RUN if [ "$INCLUDE_JAPANESE" = "true" ]; then \
/app/.venv/bin/python -m unidic download && \
chown -R appuser:appuser /app/.venv/lib/python*/site-packages/unidic; \
fi

# Copy project files
COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
COPY --chown=appuser:appuser api ./api
COPY --chown=appuser:appuser web ./web
COPY --chown=appuser:appuser VERSION ./VERSION
COPY --chown=appuser:appuser docker/intel/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh

USER appuser

# Set environment variables
ENV PATH="/app/.venv/bin:$PATH" \
PYTHONUNBUFFERED=1 \
PYTHONPATH=/app:/app/api \
UV_LINK_MODE=copy \
USE_GPU=true \
PHONEMIZER_ESPEAK_PATH=/usr/bin \
PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
DEVICE="intel" \
LD_LIBRARY_PATH="/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/mkl/latest/lib/intel64:/opt/intel/oneapi/compiler/latest/lib/intel64:$LD_LIBRARY_PATH"

# Run FastAPI server
CMD ["./entrypoint.sh"]
43 changes: 43 additions & 0 deletions docker/intel/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Kokoro-FastAPI: Intel GPU (XPU) Setup

This directory contains the configuration to run Kokoro-FastAPI leveraging Intel GPUs (Arc, Data Center, and Integrated Graphics) through the Intel Extension for PyTorch (IPEX).

## Features
- **Intel XPU Acceleration**: Uses IPEX 2.5.10 + PyTorch 2.5.1 for optimized inference.
- **Automated Driver Stack**: Integrated with Intel's reference driver installation script.
- **Secure Non-Root User**: Runs as a standard `appuser` with correct GPU group permissions.

## Requirements
- **Host Drivers**: Ensure your host has the Intel GPU drivers installed (Compute Runtime, Level Zero).
- **Docker**: Version 20.10+ with `device_cgroup_rules` support.

## Usage

### 1. Build and Start
Run the following command from the project root:
```bash
docker compose -f docker/intel/docker-compose.yml up --build
```

### 2. Verify GPU Access
Check the container logs for XPU registration messages or run:
```bash
docker compose -f docker/intel/docker-compose.yml exec kokoro-tts python -c "import torch; import intel_extension_for_pytorch; print(f'XPU Available: {torch.xpu.is_available()}')"
```

## Technical Details
- **Base Image**: Ubuntu 24.04 (required for glibc compatibility).
- **Versions**:
- Python: 3.12
- Torch: 2.5.1
- IPEX: 2.5.10+xpu
- **Groups**: The container uses a default `RENDER_GID=992`. If your host's `render` group has a different ID, pass it as a build argument:
```bash
docker compose -f docker/intel/docker-compose.yml build --build-arg RENDER_GID=$(stat -c '%g' /dev/dri/renderD128)
```

## Troubleshooting
If you see `RuntimeError: Native API failed`, check:
1. That `/dev/dri` is correctly mapped.
2. That your host user has permissions to access `/dev/dri` (usually by being in the `video` or `render` groups).
3. That the `RENDER_GID` build argument matches your host's GID.
28 changes: 28 additions & 0 deletions docker/intel/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: kokoro-tts-intel
services:
kokoro-tts:
image: kokoro-tts-intel
build:
context: ../..
dockerfile: docker/intel/Dockerfile
args:
RENDER_GID: ${RENDER_GID:-992}
volumes:
- ../../api:/app/api
- models_data:/app/api/src/models/v1_0
user: appuser
ports:
- "8880:8880"
devices:
- /dev/dri:/dev/dri # Pass Intel GPU device
environment:
- PYTHONPATH=/app:/app/api
- USE_GPU=true
- PYTHONUNBUFFERED=1
- API_LOG_LEVEL=DEBUG
- DEVICE_TYPE=xpu
device_cgroup_rules:
- 'c 226:* rmw'

volumes:
models_data:
14 changes: 14 additions & 0 deletions docker/intel/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -e

# In this simplified version, we assume permissions are handled via Dockerfile/Compose
# and the user is 'appuser'.

if [ "$DOWNLOAD_MODEL" = "true" ]; then
echo "Downloading model..."
python download_model.py --output api/src/models/v1_0
fi

echo "Starting Application..."
# We use 'exec' so the process handles signals
exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug
Loading