remsky · josejuanmontiel · Feb 3, 2026 · Feb 3, 2026 · Feb 3, 2026 · Mar 26, 2026
diff --git a/api/src/core/config.py b/api/src/core/config.py
@@ -35,7 +35,7 @@ class Settings(BaseSettings):
     )
     use_gpu: bool = True  # Whether to use GPU acceleration if available
     device_type: str | None = (
-        None  # Will be auto-detected if None, can be "cuda", "mps", or "cpu"
+        None  # Will be auto-detected if None, can be "cuda", "mps", "xpu", or "cpu"
     )
     allow_local_voice_saving: bool = (
         False  # Whether to allow saving combined voices locally
@@ -75,7 +75,7 @@ class Settings(BaseSettings):
     cors_enabled: bool = True  # Whether to enable CORS
 
     # Temp File Settings for WEB Ui
-    temp_file_dir: str = "api/temp_files"  # Directory for temporary audio files (relative to project root)
+    temp_file_dir: str = "/tmp/kokoro_temp"  # Directory for temporary audio files
     max_temp_dir_size_mb: int = 2048  # Maximum size of temp directory (2GB)
     max_temp_dir_age_hours: int = 1  # Remove temp files older than 1 hour
     max_temp_dir_count: int = 3  # Maximum number of temp files to keep
@@ -96,6 +96,14 @@ def get_device(self) -> str:
             return "mps"
         elif torch.cuda.is_available():
             return "cuda"
+
+        # Check for Intel GPU (XPU)
+        try:
+            if hasattr(torch, "xpu") and torch.xpu.is_available():
+                return "xpu"
+        except Exception:
+            pass
+
         return "cpu"
 
 

diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py
@@ -75,6 +75,8 @@ async def load_model(self, path: str) -> None:
                 self._model = self._model.to(torch.device("mps"))
             elif self._device == "cuda":
                 self._model = self._model.cuda()
+            elif self._device == "xpu":
+                self._model = self._model.to("xpu")
             else:
                 self._model = self._model.cpu()
 
@@ -350,6 +352,12 @@ def _check_memory(self) -> bool:
         if self._device == "cuda":
             memory_gb = torch.cuda.memory_allocated() / 1e9
             return memory_gb > model_config.pytorch_gpu.memory_threshold
+        elif self._device == "xpu":
+            try:
+                memory_gb = torch.xpu.memory_allocated() / 1e9
+                return memory_gb > model_config.pytorch_gpu.memory_threshold
+            except (RuntimeError, AttributeError):
+                pass
         # MPS doesn't provide memory management APIs
         return False
 
@@ -362,6 +370,12 @@ def _clear_memory(self) -> None:
             # Empty cache if available (future-proofing)
             if hasattr(torch.mps, "empty_cache"):
                 torch.mps.empty_cache()
+        elif self._device == "xpu":
+            try:
+                torch.xpu.empty_cache()
+                torch.xpu.synchronize()
+            except (RuntimeError, AttributeError):
+                pass
 
     def unload(self) -> None:
         """Unload model and free resources."""
@@ -375,6 +389,12 @@ def unload(self) -> None:
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
             torch.cuda.synchronize()
+        try:
+            if hasattr(torch, "xpu") and torch.xpu.is_available():
+                torch.xpu.empty_cache()
+                torch.xpu.synchronize()
+        except Exception:
+            pass
 
     @property
     def is_loaded(self) -> bool:

diff --git a/api/src/inference/model_manager.py b/api/src/inference/model_manager.py
@@ -32,7 +32,7 @@ def __init__(self, config: Optional[ModelConfig] = None):
 
     def _determine_device(self) -> str:
         """Determine device based on settings."""
-        return "cuda" if settings.use_gpu else "cpu"
+        return settings.get_device()
 
     async def initialize(self) -> None:
         """Initialize Kokoro V1 backend."""

diff --git a/api/src/main.py b/api/src/main.py
@@ -94,6 +94,8 @@ async def lifespan(app: FastAPI):
         startup_msg += "\nUsing Apple Metal Performance Shaders (MPS)"
     elif device == "cuda":
         startup_msg += f"\nCUDA: {torch.cuda.is_available()}"
+    elif device == "xpu":
+        startup_msg += f"\nXPU: {torch.xpu.is_available()}"
     else:
         startup_msg += "\nRunning on CPU"
     startup_msg += f"\n{voicepack_count} voice packs loaded"

diff --git a/docker-bake.hcl b/docker-bake.hcl
@@ -115,6 +115,21 @@ target "_rocm_base" {
     ]
 }
 
+# Base settings for Intel builds
+target "_intel_base" {
+    inherits = ["_common"]
+    dockerfile = "docker/intel/Dockerfile"
+    labels = {
+        "org.opencontainers.image.title"       = "Kokoro-FastAPI (Intel)"
+        "org.opencontainers.image.description" = "Kokoro TTS served via FastAPI. Intel GPU build (amd64 only)."
+    }
+    annotations = [
+        "org.opencontainers.image.title=Kokoro-FastAPI (Intel)",
+        "org.opencontainers.image.description=Kokoro TTS served via FastAPI. Intel GPU build (amd64 only).",
+    ]
+}
+
+
 
 # Individual platform targets for debugging/testing
 target "cpu-amd64" {
@@ -183,6 +198,15 @@ target "rocm-amd64" {
     ]
 }
 
+# Intel only supports x86
+target "intel-amd64" {
+    inherits = ["_intel_base"]
+    platforms = ["linux/amd64"]
+    tags = [
+        "${REGISTRY}/${OWNER}/${REPO}-intel:${VERSION}-amd64"
+    ]
+}
+
 # Development targets for faster local builds
 target "cpu-dev" {
     inherits = ["_cpu_base"]
@@ -223,10 +247,14 @@ group "rocm-all" {
     targets = ["rocm-amd64"]
 }
 
+group "intel-all" {
+    targets = ["intel-amd64"]
+}
+
 group "all" {
-    targets = ["cpu", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64"]
+    targets = ["cpu", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64", "intel-amd64"]
 }
 
 group "individual-platforms" {
-    targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64"]
+    targets = ["cpu-amd64", "cpu-arm64", "gpu-amd64", "gpu-arm64", "gpu-cu128-amd64", "rocm-amd64", "intel-amd64"]
 }
diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile
@@ -0,0 +1,104 @@
+# Stage 1: Builder - Use devel image for compilation
+FROM ubuntu:24.04 AS builder
+
+# Install Python and build dependencies
+RUN apt-get update -y && \
+    apt-get install -y python3.10 python3-venv python3-dev git curl wget gnupg2 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    mv /root/.local/bin/uv /usr/local/bin/ && \
+    mv /root/.local/bin/uvx /usr/local/bin/
+
+WORKDIR /app
+
+# Copy dependency files
+COPY pyproject.toml ./pyproject.toml
+
+# Install dependencies with intel extras
+ENV UV_HTTP_TIMEOUT=120 UV_HTTP_RETRIES=3 \
+    UV_PYTHON_INSTALL_DIR=/opt/uv-python
+
+RUN uv venv --python 3.10 && \
+    uv sync --extra intel --no-cache --no-install-project
+
+# Stage 2: Runtime
+FROM ubuntu:24.04
+
+# Install runtime dependencies + uv
+RUN apt-get update -y && \
+    apt-get install -y python3.10 espeak-ng espeak-ng-data libsndfile1 ffmpeg curl wget gnupg2 libnuma1 ocl-icd-libopencl1 libze1 intel-media-va-driver-non-free libmfx1 && \
+    curl -LsSf https://astral.sh/uv/install.sh | sh && \
+    mv /root/.local/bin/uv /usr/local/bin/ && \
+    mv /root/.local/bin/uvx /usr/local/bin/ && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Add Intel oneAPI repository for MKL and other tools
+RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
+    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
+    apt-get update && \
+    apt-get install -y intel-oneapi-mkl intel-oneapi-runtime-mkl intel-oneapi-runtime-compilers intel-oneapi-compiler-dpcpp-cpp-runtime-2025.3 && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Install Intel GPU drivers (Compute Runtime & IGC) using the reference script
+COPY docker/intel/install_ubuntu_gpu_drivers.sh /tmp/install_gpu_drivers.sh
+ARG INSTALL_DRIVER_VERSION="25.35.35096"
+RUN chmod +x /tmp/install_gpu_drivers.sh && \
+    /tmp/install_gpu_drivers.sh && \
+    rm -f /tmp/install_gpu_drivers.sh
+
+# Setup user and GID
+ARG RENDER_GID=992
+RUN mkdir -p /usr/share/espeak-ng-data && \
+    ln -s /usr/lib/*/espeak-ng-data/* /usr/share/espeak-ng-data/ && \
+    (groupmod -g ${RENDER_GID} render || groupadd -g ${RENDER_GID} render) && \
+    useradd -m -u 1001 -G video,render appuser && \
+    mkdir -p /app/api/src/models/v1_0 && \
+    chown -R appuser:appuser /app
+
+WORKDIR /app
+
+# Copy uv-managed Python interpreter (the venv's bin/python symlinks into here)
+COPY --from=builder /opt/uv-python /opt/uv-python
+
+# Copy virtual environment from builder
+COPY --from=builder --chown=appuser:appuser /app/.venv /app/.venv
+
+# Download model in runtime stage so download_model.py is present for runtime re-downloads via entrypoint
+COPY --chown=appuser:appuser docker/scripts/download_model.py ./download_model.py
+ARG DOWNLOAD_MODEL=true
+RUN if [ "$DOWNLOAD_MODEL" = "true" ]; then \
+    /app/.venv/bin/python download_model.py --output api/src/models/v1_0 && \
+    chown -R appuser:appuser /app/api/src/models; \
+    fi
+
+# Japanese support requires the UniDic dictionary
+ARG INCLUDE_JAPANESE=true
+RUN if [ "$INCLUDE_JAPANESE" = "true" ]; then \
+    /app/.venv/bin/python -m unidic download && \
+    chown -R appuser:appuser /app/.venv/lib/python*/site-packages/unidic; \
+    fi
+
+# Copy project files
+COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml
+COPY --chown=appuser:appuser api ./api
+COPY --chown=appuser:appuser web ./web
+COPY --chown=appuser:appuser VERSION ./VERSION
+COPY --chown=appuser:appuser docker/intel/entrypoint.sh ./entrypoint.sh
+RUN chmod +x ./entrypoint.sh
+
+USER appuser
+
+# Set environment variables
+ENV PATH="/app/.venv/bin:$PATH" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONPATH=/app:/app/api \
+    UV_LINK_MODE=copy \
+    USE_GPU=true \
+    PHONEMIZER_ESPEAK_PATH=/usr/bin \
+    PHONEMIZER_ESPEAK_DATA=/usr/share/espeak-ng-data \
+    ESPEAK_DATA_PATH=/usr/share/espeak-ng-data \
+    DEVICE="intel" \
+    LD_LIBRARY_PATH="/opt/intel/oneapi/mkl/latest/lib:/opt/intel/oneapi/mkl/latest/lib/intel64:/opt/intel/oneapi/compiler/latest/lib/intel64:$LD_LIBRARY_PATH"
+
+# Run FastAPI server
+CMD ["./entrypoint.sh"]
diff --git a/docker/intel/README.md b/docker/intel/README.md
@@ -0,0 +1,43 @@
+# Kokoro-FastAPI: Intel GPU (XPU) Setup
+
+This directory contains the configuration to run Kokoro-FastAPI leveraging Intel GPUs (Arc, Data Center, and Integrated Graphics) through the Intel Extension for PyTorch (IPEX).
+
+## Features
+- **Intel XPU Acceleration**: Uses IPEX 2.5.10 + PyTorch 2.5.1 for optimized inference.
+- **Automated Driver Stack**: Integrated with Intel's reference driver installation script.
+- **Secure Non-Root User**: Runs as a standard `appuser` with correct GPU group permissions.
+
+## Requirements
+- **Host Drivers**: Ensure your host has the Intel GPU drivers installed (Compute Runtime, Level Zero).
+- **Docker**: Version 20.10+ with `device_cgroup_rules` support.
+
+## Usage
+
+### 1. Build and Start
+Run the following command from the project root:
+```bash
+docker compose -f docker/intel/docker-compose.yml up --build
+```
+
+### 2. Verify GPU Access
+Check the container logs for XPU registration messages or run:
+```bash
+docker compose -f docker/intel/docker-compose.yml exec kokoro-tts python -c "import torch; import intel_extension_for_pytorch; print(f'XPU Available: {torch.xpu.is_available()}')"
+```
+
+## Technical Details
+- **Base Image**: Ubuntu 24.04 (required for glibc compatibility).
+- **Versions**:
+  - Python: 3.12
+  - Torch: 2.5.1
+  - IPEX: 2.5.10+xpu
+- **Groups**: The container uses a default `RENDER_GID=992`. If your host's `render` group has a different ID, pass it as a build argument:
+  ```bash
+  docker compose -f docker/intel/docker-compose.yml build --build-arg RENDER_GID=$(stat -c '%g' /dev/dri/renderD128)
+  ```
+
+## Troubleshooting
+If you see `RuntimeError: Native API failed`, check:
+1. That `/dev/dri` is correctly mapped.
+2. That your host user has permissions to access `/dev/dri` (usually by being in the `video` or `render` groups).
+3. That the `RENDER_GID` build argument matches your host's GID.
diff --git a/docker/intel/docker-compose.yml b/docker/intel/docker-compose.yml
@@ -0,0 +1,28 @@
+name: kokoro-tts-intel
+services:
+  kokoro-tts:
+    image: kokoro-tts-intel
+    build:
+      context: ../..
+      dockerfile: docker/intel/Dockerfile
+      args:
+        RENDER_GID: ${RENDER_GID:-992}
+    volumes:
+      - ../../api:/app/api
+      - models_data:/app/api/src/models/v1_0
+    user: appuser
+    ports:
+      - "8880:8880"
+    devices:
+      - /dev/dri:/dev/dri # Pass Intel GPU device
+    environment:
+      - PYTHONPATH=/app:/app/api
+      - USE_GPU=true
+      - PYTHONUNBUFFERED=1
+      - API_LOG_LEVEL=DEBUG
+      - DEVICE_TYPE=xpu
+    device_cgroup_rules:
+      - 'c 226:* rmw'
+
+volumes:
+  models_data:
diff --git a/docker/intel/entrypoint.sh b/docker/intel/entrypoint.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+set -e
+
+# In this simplified version, we assume permissions are handled via Dockerfile/Compose
+# and the user is 'appuser'.
+
+if [ "$DOWNLOAD_MODEL" = "true" ]; then
+    echo "Downloading model..."
+    python download_model.py --output api/src/models/v1_0
+fi
+
+echo "Starting Application..."
+# We use 'exec' so the process handles signals
+exec uv run --extra $DEVICE --no-sync python -m uvicorn api.src.main:app --host 0.0.0.0 --port 8880 --log-level debug