From a8abef9499d54c2ba026c46446fa33268e79fa48 Mon Sep 17 00:00:00 2001 From: Matt Kornfield Date: Fri, 12 Jun 2026 21:08:45 +0000 Subject: [PATCH 1/3] chore: add hf retries to files --- .../core/files/app/backends/huggingface.py | 232 ++++++++++++------ .../files/tests/test_huggingface_backend.py | 54 +++- 2 files changed, 204 insertions(+), 82 deletions(-) diff --git a/services/core/files/src/nmp/core/files/app/backends/huggingface.py b/services/core/files/src/nmp/core/files/app/backends/huggingface.py index 2bf7356a5e..6006f9364d 100644 --- a/services/core/files/src/nmp/core/files/app/backends/huggingface.py +++ b/services/core/files/src/nmp/core/files/app/backends/huggingface.py @@ -6,13 +6,14 @@ from __future__ import annotations import logging +from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field -from typing import ( - AsyncIterator, -) +from datetime import datetime, timezone +from email.utils import parsedate_to_datetime +from typing import TypeVar import aiohttp -from anyio import to_thread +from anyio import sleep, to_thread from huggingface_hub import HfApi, get_hf_file_metadata, hf_hub_url from huggingface_hub.utils import ( EntryNotFoundError, @@ -39,6 +40,12 @@ logger = logging.getLogger(__name__) +_T = TypeVar("_T") + +_HF_TRANSIENT_RETRY_ATTEMPTS = 4 +_HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS = 0.5 +_HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS = 5.0 + class HuggingfaceBackendError(StorageBackendError): """Raised when there's issues talking to Huggingface.""" @@ -101,6 +108,90 @@ def raise_for_hf_status( raise HuggingfaceBackendError(f"HTTP {status_code}{context}") +def _map_hf_http_error(exc: HfHubHTTPError) -> Exception: + if exc.response is not None: + try: + raise_for_hf_status( + exc.response.status_code, + dict(exc.response.headers), + str(exc.response.url), + ) + except ( + HuggingfaceAccessError, + HuggingfaceConfigError, + HuggingfaceUnavailableError, + HuggingfaceBackendError, + ) as mapped: + return mapped + return HuggingfaceBackendError(f"HuggingFace API error: {exc}") + + +def _retry_after_seconds(headers: dict[str, str] | None) -> float | None: + if not headers: + return None + + raw_value = headers.get("Retry-After") or headers.get("retry-after") + if not raw_value: + return None + + try: + return max(0.0, float(raw_value)) + except ValueError: + pass + + try: + retry_at = parsedate_to_datetime(raw_value) + except (TypeError, ValueError): + return None + if retry_at.tzinfo is None: + retry_at = retry_at.replace(tzinfo=timezone.utc) + return max(0.0, (retry_at - datetime.now(timezone.utc)).total_seconds()) + + +async def _sleep_before_retry( + *, + operation: str, + attempt: int, + headers: dict[str, str] | None, + error: Exception, +) -> None: + retry_after = _retry_after_seconds(headers) + delay = ( + min(retry_after, _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS) + if retry_after is not None + else min( + _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS * (2 ** (attempt - 1)), + _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS, + ) + ) + logger.warning( + "Transient HuggingFace error during %s; retrying attempt %s/%s after %.2fs: %s", + operation, + attempt + 1, + _HF_TRANSIENT_RETRY_ATTEMPTS, + delay, + error, + ) + await sleep(delay) + + +async def _run_hf_request(operation: str, request: Callable[[], _T]) -> _T: + for attempt in range(1, _HF_TRANSIENT_RETRY_ATTEMPTS + 1): + try: + return await to_thread.run_sync(request) + except EntryNotFoundError: + raise + except HfHubHTTPError as exc: + mapped = _map_hf_http_error(exc) + if isinstance(mapped, HuggingfaceUnavailableError) and attempt < _HF_TRANSIENT_RETRY_ATTEMPTS: + headers = dict(exc.response.headers) if exc.response is not None else None + await _sleep_before_retry(operation=operation, attempt=attempt, headers=headers, error=mapped) + continue + raise mapped from exc + + raise HuggingfaceBackendError(f"HuggingFace API error during {operation}") + + @dataclass class HuggingfaceStorageImpl(StorageImpl): config: HuggingfaceStorageConfig @@ -126,28 +217,20 @@ async def resolve_config(self) -> HuggingfaceStorageConfig: Raises: HuggingfaceConfigError: If the repository or revision is not found. """ - try: - info = await to_thread.run_sync( - lambda: self._api.repo_info( - repo_id=self.config.repo_id, - repo_type=self.config.repo_type, - revision=self.config.revision, - ) - ) - return self.config.model_copy( - update={ - "original_revision": self.config.revision, - "revision": info.sha, - } - ) - except HfHubHTTPError as exc: - if exc.response is not None: - raise_for_hf_status( - exc.response.status_code, - dict(exc.response.headers), - str(exc.response.url), - ) - raise HuggingfaceBackendError(f"HuggingFace API error: {exc}") from exc + info = await _run_hf_request( + "resolve repository revision", + lambda: self._api.repo_info( + repo_id=self.config.repo_id, + repo_type=self.config.repo_type, + revision=self.config.revision, + ), + ) + return self.config.model_copy( + update={ + "original_revision": self.config.revision, + "revision": info.sha, + } + ) def _get_download_url(self, filepath: str) -> str: """Generate a download URL for a file in the Huggingface repo.""" @@ -162,14 +245,18 @@ def _get_download_url(self, filepath: str) -> str: async def _get_hf_file_metadata(self, filepath: str): """Get file metadata from Huggingface for a specific file.""" url = self._get_download_url(filepath) - return await to_thread.run_sync(lambda: get_hf_file_metadata(url=url, token=self.secrets.get("token"))) + return await _run_hf_request( + "get file metadata", + lambda: get_hf_file_metadata(url=url, token=self.secrets.get("token")), + ) async def list_files(self, path: str | None = None) -> list[FileInfo]: """List files in the Huggingface repository.""" try: # list_repo_tree returns RepoFile and RepoFolder objects # We filter for files only (items with size attribute) - items = await to_thread.run_sync( + items = await _run_hf_request( + "list repository tree", lambda: list( self._api.list_repo_tree( repo_id=self.config.repo_id, @@ -178,7 +265,7 @@ async def list_files(self, path: str | None = None) -> list[FileInfo]: path_in_repo=path, recursive=True, ) - ) + ), ) except EntryNotFoundError: # list_repo_tree expects a directory path. If path points to a file @@ -192,14 +279,6 @@ async def list_files(self, path: str | None = None) -> list[FileInfo]: # Neither a directory nor a file - return empty list return [] return [] - except HfHubHTTPError as exc: - if exc.response is not None: - raise_for_hf_status( - exc.response.status_code, - dict(exc.response.headers), - str(exc.response.url), - ) - raise HuggingfaceBackendError(f"HuggingFace API error: {exc}") from exc file_infos = [] for item in items: @@ -225,17 +304,12 @@ async def get_file(self, path: str) -> FileInfo: url = self._get_download_url(path) try: - metadata = await to_thread.run_sync(lambda: get_hf_file_metadata(url=url, token=self.secrets.get("token"))) + metadata = await _run_hf_request( + "get file metadata", + lambda: get_hf_file_metadata(url=url, token=self.secrets.get("token")), + ) except EntryNotFoundError as exc: raise NotFoundError(f"File '{path}' not found in {self.config.repo_id}@{self.config.revision}") from exc - except HfHubHTTPError as exc: - if exc.response is not None: - raise_for_hf_status( - exc.response.status_code, - dict(exc.response.headers), - str(exc.response.url), - ) - raise HuggingfaceBackendError(f"HuggingFace API error: {exc}") from exc return FileInfo(path=path, size=metadata.size) @@ -254,21 +328,37 @@ async def download(self, path: str, byte_range: ByteRange | None) -> AsyncIterat headers["Authorization"] = f"Bearer {self.secrets.get('token')}" async def _download() -> AsyncIterator[bytes]: - session = get_http_session() - try: - async for chunk in download_url_streaming( - url=download_url, - session=session, - headers=headers if headers else None, - byte_range=byte_range, - chunk_size=self.config.read_chunk_size, - ): - yield chunk - except aiohttp.ClientResponseError as exc: - response_headers = dict(exc.headers) if exc.headers else None - raise_for_hf_status(exc.status, response_headers, download_url) - except aiohttp.ClientError as exc: - raise HuggingfaceBackendError(f"Network error downloading file {path}") from exc + for attempt in range(1, _HF_TRANSIENT_RETRY_ATTEMPTS + 1): + session = get_http_session() + yielded = False + try: + async for chunk in download_url_streaming( + url=download_url, + session=session, + headers=headers if headers else None, + byte_range=byte_range, + chunk_size=self.config.read_chunk_size, + ): + yielded = True + yield chunk + return + except aiohttp.ClientResponseError as exc: + response_headers = dict(exc.headers) if exc.headers else None + try: + raise_for_hf_status(exc.status, response_headers, download_url) + except HuggingfaceUnavailableError as mapped: + if yielded or attempt >= _HF_TRANSIENT_RETRY_ATTEMPTS: + raise mapped from exc + await _sleep_before_retry( + operation="download file", + attempt=attempt, + headers=response_headers, + error=mapped, + ) + continue + raise + except aiohttp.ClientError as exc: + raise HuggingfaceBackendError(f"Network error downloading file {path}") from exc return _download() @@ -285,12 +375,13 @@ async def validate_storage(self): """ validate_external_host(self.config.endpoint) try: - repo_info = await to_thread.run_sync( + repo_info = await _run_hf_request( + "validate repository", lambda: self._api.repo_info( repo_id=self.config.repo_id, repo_type=self.config.repo_type, revision=self.config.revision, - ) + ), ) # Verify we can actually download files by checking a file's metadata. @@ -299,14 +390,13 @@ async def validate_storage(self): sibling = repo_info.siblings[0] await self._get_hf_file_metadata(sibling.rfilename) - except HfHubHTTPError as exc: - if exc.response is not None: - raise_for_hf_status( - exc.response.status_code, - dict(exc.response.headers), - str(exc.response.url), - ) - raise HuggingfaceBackendError(f"HuggingFace API error: {exc}") from exc + except ( + HuggingfaceAccessError, + HuggingfaceConfigError, + HuggingfaceUnavailableError, + HuggingfaceBackendError, + ): + raise except Exception as exc: raise HuggingfaceBackendError( f"Failed to access Huggingface repository {self.config.repo_id}@{self.config.revision}" diff --git a/services/core/files/tests/test_huggingface_backend.py b/services/core/files/tests/test_huggingface_backend.py index 629e1135fb..7acf123ad2 100644 --- a/services/core/files/tests/test_huggingface_backend.py +++ b/services/core/files/tests/test_huggingface_backend.py @@ -3,7 +3,7 @@ """Tests for Huggingface storage backend.""" -from unittest.mock import Mock, patch +from unittest.mock import AsyncMock, Mock, patch import aiohttp import httpx @@ -12,6 +12,7 @@ from nmp.core.files.app.backends.base import ByteRange from nmp.core.files.app.backends.factory import storage_impl_factory from nmp.core.files.app.backends.huggingface import ( + _HF_TRANSIENT_RETRY_ATTEMPTS, HuggingfaceAccessError, HuggingfaceBackendError, HuggingfaceConfigError, @@ -32,6 +33,17 @@ def _hf_http_error_without_response(message: str): return error +def _hf_http_error(status_code: int, message: str = "HuggingFace error", headers: dict[str, str] | None = None): + """Create a Hugging Face HTTP error with a mock response.""" + from huggingface_hub.utils import HfHubHTTPError + + response = Mock() + response.status_code = status_code + response.headers = headers or {} + response.url = "https://huggingface.co/test-org/test-repo" + return HfHubHTTPError(message, response=response) + + @pytest.fixture def mock_httpx_response(): """Create a mock httpx.Response for Huggingface exceptions.""" @@ -585,25 +597,22 @@ async def test_get_file_gated_repo_error(hf_config, mock_httpx_response, hf_secr async def test_get_file_rate_limit_error(hf_config, hf_secrets_empty): """Test get_file when rate limited by HuggingFace.""" - from huggingface_hub.utils import HfHubHTTPError - - # Create a mock response with 429 status code - mock_response = Mock() - mock_response.status_code = 429 - mock_response.headers = {} - mock_response.url = "https://huggingface.co/test-org/test-repo/test.txt" - - mock_error = HfHubHTTPError("Rate limited", response=mock_response) + mock_error = _hf_http_error(429, "Rate limited", headers={"Retry-After": "0"}) with patch("nmp.core.files.app.backends.huggingface.get_hf_file_metadata") as mock_metadata: mock_metadata.side_effect = mock_error impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) - with pytest.raises(HuggingfaceUnavailableError) as exc_info: + with ( + patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep, + pytest.raises(HuggingfaceUnavailableError) as exc_info, + ): await impl.get_file("test.txt") assert "Rate limited" in str(exc_info.value) + assert mock_metadata.call_count == _HF_TRANSIENT_RETRY_ATTEMPTS + assert mock_sleep.await_count == _HF_TRANSIENT_RETRY_ATTEMPTS - 1 async def test_validate_storage_gated_repo_error(hf_config, mock_hf_api, mock_httpx_response, hf_secrets_empty): @@ -750,6 +759,29 @@ async def test_resolve_config_tag_to_sha(mock_hf_api, hf_secrets_empty): assert resolved_config.revision == "def789abc123456" +async def test_resolve_config_retries_rate_limit_then_succeeds(mock_hf_api, hf_secrets_empty): + """Transient HuggingFace rate limits are retried during revision resolution.""" + mock_repo_info = Mock() + mock_repo_info.sha = "abc123def456789" + mock_hf_api.repo_info.side_effect = [ + _hf_http_error(429, "Rate limited", headers={"Retry-After": "0"}), + mock_repo_info, + ] + + impl = HuggingfaceStorageImpl( + HuggingfaceStorageConfig(repo_id="test-org/test-repo", repo_type="model", revision="main"), + hf_secrets_empty, + ) + + with patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep: + resolved_config = await impl.resolve_config() + + assert resolved_config.original_revision == "main" + assert resolved_config.revision == "abc123def456789" + assert mock_hf_api.repo_info.call_count == 2 + mock_sleep.assert_awaited_once_with(0.0) + + async def test_resolve_config_repo_not_found(hf_config, mock_hf_api, mock_httpx_response, hf_secrets_empty): """Test resolve_config raises HuggingfaceConfigError when repo not found.""" from huggingface_hub.utils import HfHubHTTPError From 042c44fafb7ab3c46d5be0f592b6711d386cc680 Mon Sep 17 00:00:00 2001 From: Matt Kornfield Date: Mon, 15 Jun 2026 17:48:40 +0000 Subject: [PATCH 2/3] chore: more hf fixes Signed-off-by: Matt Kornfield --- .github/workflows/ci.yaml | 6 + .../core/files/app/backends/huggingface.py | 103 ++++++++++++++++-- .../files/tests/test_huggingface_backend.py | 54 ++++++++- 3 files changed, 150 insertions(+), 13 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b2dca05e61..5c71922612 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -693,6 +693,9 @@ jobs: env: EXTRA: cpu HF_TOKEN: ${{ secrets.HF_TOKEN }} + NMP_FILES_HF_RETRY_ATTEMPTS: "7" + NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS: "1" + NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS: "30" PYTEST_WORKERS: "4" _TYPER_FORCE_DISABLE_TERMINAL: "1" - name: Upload test artifacts @@ -855,6 +858,9 @@ jobs: E2E_SERVICES_LOG_DIR: ${{ runner.temp }}/e2e-services-logs NGC_API_KEY: ${{ secrets.NGC_REGISTRY_READ_TOKEN }} HF_TOKEN: ${{ secrets.HF_TOKEN }} + NMP_FILES_HF_RETRY_ATTEMPTS: "7" + NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS: "1" + NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS: "30" - name: Dump server logs if: always() run: | diff --git a/services/core/files/src/nmp/core/files/app/backends/huggingface.py b/services/core/files/src/nmp/core/files/app/backends/huggingface.py index 6006f9364d..5e49014ed1 100644 --- a/services/core/files/src/nmp/core/files/app/backends/huggingface.py +++ b/services/core/files/src/nmp/core/files/app/backends/huggingface.py @@ -6,6 +6,7 @@ from __future__ import annotations import logging +import os from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field from datetime import datetime, timezone @@ -13,6 +14,7 @@ from typing import TypeVar import aiohttp +import httpx from anyio import sleep, to_thread from huggingface_hub import HfApi, get_hf_file_metadata, hf_hub_url from huggingface_hub.utils import ( @@ -46,6 +48,17 @@ _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS = 0.5 _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS = 5.0 +_HF_TRANSIENT_RETRY_ATTEMPTS_ENV = "NMP_FILES_HF_RETRY_ATTEMPTS" +_HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS_ENV = "NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS" +_HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS_ENV = "NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS" + + +@dataclass(frozen=True) +class _HfRetryConfig: + attempts: int + initial_delay_seconds: float + max_delay_seconds: float + class HuggingfaceBackendError(StorageBackendError): """Raised when there's issues talking to Huggingface.""" @@ -63,6 +76,44 @@ class HuggingfaceUnavailableError(StorageUnavailableError): """Raised when HuggingFace is unavailable (5xx, 429, timeout).""" +def _env_int(name: str, default: int, min_value: int = 1) -> int: + value = os.environ.get(name) + if value is None: + return default + try: + parsed = int(value) + except ValueError: + logger.warning("Invalid %s=%r; using default %s", name, value, default) + return default + return max(min_value, parsed) + + +def _env_float(name: str, default: float, min_value: float = 0.0) -> float: + value = os.environ.get(name) + if value is None: + return default + try: + parsed = float(value) + except ValueError: + logger.warning("Invalid %s=%r; using default %.2f", name, value, default) + return default + return max(min_value, parsed) + + +def _hf_retry_config() -> _HfRetryConfig: + return _HfRetryConfig( + attempts=_env_int(_HF_TRANSIENT_RETRY_ATTEMPTS_ENV, _HF_TRANSIENT_RETRY_ATTEMPTS), + initial_delay_seconds=_env_float( + _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS_ENV, + _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS, + ), + max_delay_seconds=_env_float( + _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS_ENV, + _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS, + ), + ) + + def raise_for_hf_status( status_code: int, headers: dict[str, str] | None = None, @@ -152,23 +203,24 @@ async def _sleep_before_retry( *, operation: str, attempt: int, + retry_config: _HfRetryConfig, headers: dict[str, str] | None, error: Exception, ) -> None: retry_after = _retry_after_seconds(headers) delay = ( - min(retry_after, _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS) + min(retry_after, retry_config.max_delay_seconds) if retry_after is not None else min( - _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS * (2 ** (attempt - 1)), - _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS, + retry_config.initial_delay_seconds * (2 ** (attempt - 1)), + retry_config.max_delay_seconds, ) ) logger.warning( "Transient HuggingFace error during %s; retrying attempt %s/%s after %.2fs: %s", operation, attempt + 1, - _HF_TRANSIENT_RETRY_ATTEMPTS, + retry_config.attempts, delay, error, ) @@ -176,16 +228,35 @@ async def _sleep_before_retry( async def _run_hf_request(operation: str, request: Callable[[], _T]) -> _T: - for attempt in range(1, _HF_TRANSIENT_RETRY_ATTEMPTS + 1): + retry_config = _hf_retry_config() + for attempt in range(1, retry_config.attempts + 1): try: return await to_thread.run_sync(request) except EntryNotFoundError: raise except HfHubHTTPError as exc: mapped = _map_hf_http_error(exc) - if isinstance(mapped, HuggingfaceUnavailableError) and attempt < _HF_TRANSIENT_RETRY_ATTEMPTS: + if isinstance(mapped, HuggingfaceUnavailableError) and attempt < retry_config.attempts: headers = dict(exc.response.headers) if exc.response is not None else None - await _sleep_before_retry(operation=operation, attempt=attempt, headers=headers, error=mapped) + await _sleep_before_retry( + operation=operation, + attempt=attempt, + retry_config=retry_config, + headers=headers, + error=mapped, + ) + continue + raise mapped from exc + except httpx.RequestError as exc: + mapped = HuggingfaceUnavailableError(f"Network error during {operation}: {exc}") + if attempt < retry_config.attempts: + await _sleep_before_retry( + operation=operation, + attempt=attempt, + retry_config=retry_config, + headers=None, + error=mapped, + ) continue raise mapped from exc @@ -328,7 +399,8 @@ async def download(self, path: str, byte_range: ByteRange | None) -> AsyncIterat headers["Authorization"] = f"Bearer {self.secrets.get('token')}" async def _download() -> AsyncIterator[bytes]: - for attempt in range(1, _HF_TRANSIENT_RETRY_ATTEMPTS + 1): + retry_config = _hf_retry_config() + for attempt in range(1, retry_config.attempts + 1): session = get_http_session() yielded = False try: @@ -347,18 +419,29 @@ async def _download() -> AsyncIterator[bytes]: try: raise_for_hf_status(exc.status, response_headers, download_url) except HuggingfaceUnavailableError as mapped: - if yielded or attempt >= _HF_TRANSIENT_RETRY_ATTEMPTS: + if yielded or attempt >= retry_config.attempts: raise mapped from exc await _sleep_before_retry( operation="download file", attempt=attempt, + retry_config=retry_config, headers=response_headers, error=mapped, ) continue raise except aiohttp.ClientError as exc: - raise HuggingfaceBackendError(f"Network error downloading file {path}") from exc + mapped = HuggingfaceUnavailableError(f"Network error downloading file {path}: {exc}") + if yielded or attempt >= retry_config.attempts: + raise mapped from exc + await _sleep_before_retry( + operation="download file", + attempt=attempt, + retry_config=retry_config, + headers=None, + error=mapped, + ) + continue return _download() diff --git a/services/core/files/tests/test_huggingface_backend.py b/services/core/files/tests/test_huggingface_backend.py index 7acf123ad2..27e3f306a6 100644 --- a/services/core/files/tests/test_huggingface_backend.py +++ b/services/core/files/tests/test_huggingface_backend.py @@ -3,7 +3,7 @@ """Tests for Huggingface storage backend.""" -from unittest.mock import AsyncMock, Mock, patch +from unittest.mock import AsyncMock, Mock, call, patch import aiohttp import httpx @@ -163,6 +163,26 @@ async def test_list_files_no_filter(hf_config, mock_hf_api, hf_secrets_empty): ) +async def test_list_files_network_error_retries_then_succeeds(hf_config, mock_hf_api, hf_secrets_empty): + """Transient HuggingFace transport errors are retried.""" + mock_file = Mock() + mock_file.path = "file1.txt" + mock_file.size = 100 + mock_hf_api.list_repo_tree.side_effect = [ + httpx.ConnectError("Connection reset by peer"), + [mock_file], + ] + + impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) + + with patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep: + files = await impl.list_files() + + assert files[0].path == "file1.txt" + assert mock_hf_api.list_repo_tree.call_count == 2 + mock_sleep.assert_awaited_once_with(0.5) + + async def test_list_files_with_path_filter(hf_config, mock_hf_api, hf_secrets_empty): """Test listing files with path filter.""" mock_file1 = Mock() @@ -436,16 +456,21 @@ async def mock_chunks(): raise aiohttp.ClientError("Connection failed") yield # pragma: no cover - mock_stream.return_value = mock_chunks() + mock_stream.side_effect = lambda *args, **kwargs: mock_chunks() impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) download_iter = await impl.download("test.txt", None) - with pytest.raises(HuggingfaceBackendError) as exc_info: + with ( + patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep, + pytest.raises(HuggingfaceUnavailableError) as exc_info, + ): async for _ in download_iter: pass assert "Network error" in str(exc_info.value) + assert mock_stream.call_count == _HF_TRANSIENT_RETRY_ATTEMPTS + assert mock_sleep.await_count == _HF_TRANSIENT_RETRY_ATTEMPTS - 1 async def test_validate_storage_success(hf_config, mock_hf_api, hf_secrets_empty): @@ -615,6 +640,29 @@ async def test_get_file_rate_limit_error(hf_config, hf_secrets_empty): assert mock_sleep.await_count == _HF_TRANSIENT_RETRY_ATTEMPTS - 1 +async def test_get_file_rate_limit_uses_retry_env_override(hf_config, hf_secrets_empty, monkeypatch): + """HF retry attempts and exponential delays can be increased by environment.""" + monkeypatch.setenv("NMP_FILES_HF_RETRY_ATTEMPTS", "3") + monkeypatch.setenv("NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS", "2") + monkeypatch.setenv("NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS", "3") + + mock_error = _hf_http_error(429, "Rate limited") + + with patch("nmp.core.files.app.backends.huggingface.get_hf_file_metadata") as mock_metadata: + mock_metadata.side_effect = mock_error + + impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) + + with ( + patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep, + pytest.raises(HuggingfaceUnavailableError), + ): + await impl.get_file("test.txt") + + assert mock_metadata.call_count == 3 + mock_sleep.assert_has_awaits([call(2.0), call(3.0)]) + + async def test_validate_storage_gated_repo_error(hf_config, mock_hf_api, mock_httpx_response, hf_secrets_empty): """Test storage validation when gated repo access is denied.""" from huggingface_hub.utils import GatedRepoError From fbac457f7954486ae8782b096c14cffe12e05df4 Mon Sep 17 00:00:00 2001 From: Matt Kornfield Date: Tue, 16 Jun 2026 13:52:14 +0000 Subject: [PATCH 3/3] chore: lint fixes Signed-off-by: Matt Kornfield --- docs/set-up/config-reference.mdx | 6 ++ .../nemo-platform/.nmpcontext/stainless.yaml | 2 + .../src/nemo_platform/resources/files/api.md | 2 +- .../nemo_platform/resources/files/filesets.py | 1 - .../src/nemo_platform/types/__init__.py | 1 + .../src/nemo_platform/types/files/__init__.py | 2 - .../src/nemo_platform/types/files/fileset.py | 2 +- .../types/files/fileset_create_params.py | 1 - .../types/files/fileset_metadata_param.py | 47 ----------- .../nemo_platform/types/shared/__init__.py | 1 + .../{files => shared}/fileset_metadata.py | 4 +- .../shared_params/fileset_metadata_param.py | 4 +- sdk/stainless.yaml | 2 + .../nmp/core/auth/assets/static-authz.yaml | 1 + .../core/files/app/backends/huggingface.py | 83 ++++--------------- .../core/files/src/nmp/core/files/config.py | 18 ++++ .../core/files/tests/test_files_config.py | 52 ++++++++++++ .../files/tests/test_huggingface_backend.py | 44 ++++++---- 18 files changed, 132 insertions(+), 141 deletions(-) delete mode 100644 sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py rename sdk/python/nemo-platform/src/nemo_platform/types/{files => shared}/fileset_metadata.py (91%) diff --git a/docs/set-up/config-reference.mdx b/docs/set-up/config-reference.mdx index dc79ce0b2a..ca7adcaed3 100644 --- a/docs/set-up/config-reference.mdx +++ b/docs/set-up/config-reference.mdx @@ -188,6 +188,12 @@ files: file_lock_ttl_seconds: 300 # Maximum concurrent downloads during cache warming | default: 3 cache_warming_max_concurrent: 3 + # Maximum Hugging Face request attempts for transient failures. | default: 4 + hf_retry_attempts: 4 + # Initial Hugging Face retry delay in seconds before exponential backoff. | default: 0.5 + hf_retry_initial_delay_seconds: 0.5 + # Maximum Hugging Face retry delay in seconds. | default: 5.0 + hf_retry_max_delay_seconds: 5.0 ``` ### `inference_gateway` diff --git a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml index fcbb5c2a58..f020e7355d 100644 --- a/sdk/python/nemo-platform/.nmpcontext/stainless.yaml +++ b/sdk/python/nemo-platform/.nmpcontext/stainless.yaml @@ -927,3 +927,5 @@ resources: experiment_session_responses_page: ExperimentSessionResponsesPage methods: list: get /apis/intake/v2/workspaces/{workspace}/experiments/{name}/sessions + methods: + create: post /apis/auth/v2/authz/{entrypoint} diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md index 882f649add..72e7b5ca66 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/api.md @@ -33,7 +33,7 @@ Methods: Types: ```python -from nemo_platform.types.files import FilesetFilter, FilesetMetadata, FilesetMetadataParam +from nemo_platform.types.files import FilesetFilter ``` Methods: diff --git a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py index 2fbd9935dc..9b7afcb95f 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py +++ b/sdk/python/nemo-platform/src/nemo_platform/resources/files/filesets.py @@ -34,7 +34,6 @@ from ...pagination import SyncDefaultPagination, AsyncDefaultPagination from ...types.files import ( FilesetPurpose, - FilesetMetadataParam, fileset_list_params, fileset_create_params, fileset_update_params, diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py index 571b87927b..eb3af5c4f7 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/__init__.py @@ -32,6 +32,7 @@ PlatformJobLog as PlatformJobLog, ToolCallConfig as ToolCallConfig, APIEndpointData as APIEndpointData, + FilesetMetadata as FilesetMetadata, FileStorageType as FileStorageType, InferenceParams as InferenceParams, LinearLayerSpec as LinearLayerSpec, diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py index b76dd4a694..3833c1d785 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/__init__.py @@ -22,7 +22,6 @@ from .cache_status import CacheStatus as CacheStatus from .fileset_file import FilesetFile as FilesetFile from .fileset_purpose import FilesetPurpose as FilesetPurpose -from .fileset_metadata import FilesetMetadata as FilesetMetadata from .s3_storage_config import S3StorageConfig as S3StorageConfig from .ngc_storage_config import NGCStorageConfig as NGCStorageConfig from .fileset_list_params import FilesetListParams as FilesetListParams @@ -33,7 +32,6 @@ from .fileset_create_params import FilesetCreateParams as FilesetCreateParams from .fileset_update_params import FilesetUpdateParams as FilesetUpdateParams from .file_list_files_params import FileListFilesParams as FileListFilesParams -from .fileset_metadata_param import FilesetMetadataParam as FilesetMetadataParam from .file_upload_file_params import FileUploadFileParams as FileUploadFileParams from .s3_storage_config_param import S3StorageConfigParam as S3StorageConfigParam from .ngc_storage_config_param import NGCStorageConfigParam as NGCStorageConfigParam diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py index e6d9642b7a..810d5ce990 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset.py @@ -20,10 +20,10 @@ from ..._models import BaseModel from .fileset_purpose import FilesetPurpose -from .fileset_metadata import FilesetMetadata from .s3_storage_config import S3StorageConfig from .ngc_storage_config import NGCStorageConfig from .local_storage_config import LocalStorageConfig +from ..shared.fileset_metadata import FilesetMetadata from .huggingface_storage_config import HuggingfaceStorageConfig __all__ = ["Fileset", "Storage"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py index ea3cb763f7..9836fcb477 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_create_params.py @@ -21,7 +21,6 @@ from typing_extensions import Required, TypeAlias, TypedDict from .fileset_purpose import FilesetPurpose -from .fileset_metadata_param import FilesetMetadataParam from .s3_storage_config_param import S3StorageConfigParam from .ngc_storage_config_param import NGCStorageConfigParam from .local_storage_config_param import LocalStorageConfigParam diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py deleted file mode 100644 index 66f37de921..0000000000 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata_param.py +++ /dev/null @@ -1,47 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details. - -from __future__ import annotations - -from typing_extensions import TypedDict - -from ..shared_params.model_metadata_content import ModelMetadataContent -from ..shared_params.dataset_metadata_content import DatasetMetadataContent - -__all__ = ["FilesetMetadataParam"] - - -class FilesetMetadataParam(TypedDict, total=False): - """Tagged metadata container - the key indicates the type. - - Example: - metadata = FilesetMetadata( - dataset=DatasetMetadataContent( - schema={"columns": ["id", "name"]}, - ) - ) - """ - - dataset: DatasetMetadataContent - """Content for dataset-type filesets.""" - - model: ModelMetadataContent - """Content for model-type filesets. - - Contains tool calling configuration that is merged into the ModelSpec during - checkpoint analysis. - """ diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py index 70ea9bdc92..d16fead87f 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared/__init__.py @@ -26,6 +26,7 @@ from .delete_response import DeleteResponse as DeleteResponse from .finetuning_type import FinetuningType as FinetuningType from .pagination_data import PaginationData as PaginationData +from .fileset_metadata import FilesetMetadata as FilesetMetadata from .inference_params import InferenceParams as InferenceParams from .platform_job_log import PlatformJobLog as PlatformJobLog from .tool_call_config import ToolCallConfig as ToolCallConfig diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py similarity index 91% rename from sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py rename to sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py index 36573bd374..b35b6d8ecc 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/files/fileset_metadata.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared/fileset_metadata.py @@ -18,8 +18,8 @@ from typing import Optional from ..._models import BaseModel -from ..shared.model_metadata_content import ModelMetadataContent -from ..shared.dataset_metadata_content import DatasetMetadataContent +from .model_metadata_content import ModelMetadataContent +from .dataset_metadata_content import DatasetMetadataContent __all__ = ["FilesetMetadata"] diff --git a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata_param.py b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata_param.py index 66f37de921..e3f510ca6e 100644 --- a/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata_param.py +++ b/sdk/python/nemo-platform/src/nemo_platform/types/shared_params/fileset_metadata_param.py @@ -19,8 +19,8 @@ from typing_extensions import TypedDict -from ..shared_params.model_metadata_content import ModelMetadataContent -from ..shared_params.dataset_metadata_content import DatasetMetadataContent +from .model_metadata_content import ModelMetadataContent +from .dataset_metadata_content import DatasetMetadataContent __all__ = ["FilesetMetadataParam"] diff --git a/sdk/stainless.yaml b/sdk/stainless.yaml index fcbb5c2a58..f020e7355d 100644 --- a/sdk/stainless.yaml +++ b/sdk/stainless.yaml @@ -927,3 +927,5 @@ resources: experiment_session_responses_page: ExperimentSessionResponsesPage methods: list: get /apis/intake/v2/workspaces/{workspace}/experiments/{name}/sessions + methods: + create: post /apis/auth/v2/authz/{entrypoint} diff --git a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml index 2b40eb7fb8..23905c8771 100644 --- a/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml +++ b/services/core/auth/src/nmp/core/auth/assets/static-authz.yaml @@ -321,6 +321,7 @@ authz: description: "Read and write access to workspace resources" includes: ["Viewer"] permissions: + - auth.authz.create - filesets.create - filesets.delete - filesets.update diff --git a/services/core/files/src/nmp/core/files/app/backends/huggingface.py b/services/core/files/src/nmp/core/files/app/backends/huggingface.py index 5e49014ed1..38b601f64d 100644 --- a/services/core/files/src/nmp/core/files/app/backends/huggingface.py +++ b/services/core/files/src/nmp/core/files/app/backends/huggingface.py @@ -6,7 +6,6 @@ from __future__ import annotations import logging -import os from collections.abc import AsyncIterator, Callable from dataclasses import dataclass, field from datetime import datetime, timezone @@ -32,6 +31,7 @@ from nmp.core.files.app.external_hosts import validate_external_host from nmp.core.files.app.http_session import get_http_session from nmp.core.files.app.streaming import download_url_streaming +from nmp.core.files.config import FilesConfig, files_config from nmp.core.files.exceptions import ( NotFoundError, StorageAccessError, @@ -44,21 +44,6 @@ _T = TypeVar("_T") -_HF_TRANSIENT_RETRY_ATTEMPTS = 4 -_HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS = 0.5 -_HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS = 5.0 - -_HF_TRANSIENT_RETRY_ATTEMPTS_ENV = "NMP_FILES_HF_RETRY_ATTEMPTS" -_HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS_ENV = "NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS" -_HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS_ENV = "NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS" - - -@dataclass(frozen=True) -class _HfRetryConfig: - attempts: int - initial_delay_seconds: float - max_delay_seconds: float - class HuggingfaceBackendError(StorageBackendError): """Raised when there's issues talking to Huggingface.""" @@ -76,44 +61,6 @@ class HuggingfaceUnavailableError(StorageUnavailableError): """Raised when HuggingFace is unavailable (5xx, 429, timeout).""" -def _env_int(name: str, default: int, min_value: int = 1) -> int: - value = os.environ.get(name) - if value is None: - return default - try: - parsed = int(value) - except ValueError: - logger.warning("Invalid %s=%r; using default %s", name, value, default) - return default - return max(min_value, parsed) - - -def _env_float(name: str, default: float, min_value: float = 0.0) -> float: - value = os.environ.get(name) - if value is None: - return default - try: - parsed = float(value) - except ValueError: - logger.warning("Invalid %s=%r; using default %.2f", name, value, default) - return default - return max(min_value, parsed) - - -def _hf_retry_config() -> _HfRetryConfig: - return _HfRetryConfig( - attempts=_env_int(_HF_TRANSIENT_RETRY_ATTEMPTS_ENV, _HF_TRANSIENT_RETRY_ATTEMPTS), - initial_delay_seconds=_env_float( - _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS_ENV, - _HF_TRANSIENT_RETRY_INITIAL_DELAY_SECONDS, - ), - max_delay_seconds=_env_float( - _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS_ENV, - _HF_TRANSIENT_RETRY_MAX_DELAY_SECONDS, - ), - ) - - def raise_for_hf_status( status_code: int, headers: dict[str, str] | None = None, @@ -203,24 +150,24 @@ async def _sleep_before_retry( *, operation: str, attempt: int, - retry_config: _HfRetryConfig, + retry_config: FilesConfig, headers: dict[str, str] | None, error: Exception, ) -> None: retry_after = _retry_after_seconds(headers) delay = ( - min(retry_after, retry_config.max_delay_seconds) + min(retry_after, retry_config.hf_retry_max_delay_seconds) if retry_after is not None else min( - retry_config.initial_delay_seconds * (2 ** (attempt - 1)), - retry_config.max_delay_seconds, + retry_config.hf_retry_initial_delay_seconds * (2 ** (attempt - 1)), + retry_config.hf_retry_max_delay_seconds, ) ) logger.warning( "Transient HuggingFace error during %s; retrying attempt %s/%s after %.2fs: %s", operation, attempt + 1, - retry_config.attempts, + retry_config.hf_retry_attempts, delay, error, ) @@ -228,15 +175,15 @@ async def _sleep_before_retry( async def _run_hf_request(operation: str, request: Callable[[], _T]) -> _T: - retry_config = _hf_retry_config() - for attempt in range(1, retry_config.attempts + 1): + retry_config = files_config() + for attempt in range(1, retry_config.hf_retry_attempts + 1): try: return await to_thread.run_sync(request) except EntryNotFoundError: raise except HfHubHTTPError as exc: mapped = _map_hf_http_error(exc) - if isinstance(mapped, HuggingfaceUnavailableError) and attempt < retry_config.attempts: + if isinstance(mapped, HuggingfaceUnavailableError) and attempt < retry_config.hf_retry_attempts: headers = dict(exc.response.headers) if exc.response is not None else None await _sleep_before_retry( operation=operation, @@ -249,7 +196,7 @@ async def _run_hf_request(operation: str, request: Callable[[], _T]) -> _T: raise mapped from exc except httpx.RequestError as exc: mapped = HuggingfaceUnavailableError(f"Network error during {operation}: {exc}") - if attempt < retry_config.attempts: + if attempt < retry_config.hf_retry_attempts: await _sleep_before_retry( operation=operation, attempt=attempt, @@ -260,7 +207,7 @@ async def _run_hf_request(operation: str, request: Callable[[], _T]) -> _T: continue raise mapped from exc - raise HuggingfaceBackendError(f"HuggingFace API error during {operation}") + raise AssertionError("unreachable in _run_hf_request") @dataclass @@ -399,8 +346,8 @@ async def download(self, path: str, byte_range: ByteRange | None) -> AsyncIterat headers["Authorization"] = f"Bearer {self.secrets.get('token')}" async def _download() -> AsyncIterator[bytes]: - retry_config = _hf_retry_config() - for attempt in range(1, retry_config.attempts + 1): + retry_config = files_config() + for attempt in range(1, retry_config.hf_retry_attempts + 1): session = get_http_session() yielded = False try: @@ -419,7 +366,7 @@ async def _download() -> AsyncIterator[bytes]: try: raise_for_hf_status(exc.status, response_headers, download_url) except HuggingfaceUnavailableError as mapped: - if yielded or attempt >= retry_config.attempts: + if yielded or attempt >= retry_config.hf_retry_attempts: raise mapped from exc await _sleep_before_retry( operation="download file", @@ -432,7 +379,7 @@ async def _download() -> AsyncIterator[bytes]: raise except aiohttp.ClientError as exc: mapped = HuggingfaceUnavailableError(f"Network error downloading file {path}: {exc}") - if yielded or attempt >= retry_config.attempts: + if yielded or attempt >= retry_config.hf_retry_attempts: raise mapped from exc await _sleep_before_retry( operation="download file", diff --git a/services/core/files/src/nmp/core/files/config.py b/services/core/files/src/nmp/core/files/config.py index de8a549a82..0dc1596a0e 100644 --- a/services/core/files/src/nmp/core/files/config.py +++ b/services/core/files/src/nmp/core/files/config.py @@ -61,6 +61,24 @@ def validate_allowed_external_hosts(self) -> "FilesConfig": description="Maximum concurrent downloads during cache warming", ) + hf_retry_attempts: int = Field( + default=4, + ge=1, + description="Maximum Hugging Face request attempts for transient failures.", + ) + + hf_retry_initial_delay_seconds: float = Field( + default=0.5, + ge=0.0, + description="Initial Hugging Face retry delay in seconds before exponential backoff.", + ) + + hf_retry_max_delay_seconds: float = Field( + default=5.0, + ge=0.0, + description="Maximum Hugging Face retry delay in seconds.", + ) + # TODO(v2): CONFIG @cache diff --git a/services/core/files/tests/test_files_config.py b/services/core/files/tests/test_files_config.py index 569355511c..d2b3f54e1e 100644 --- a/services/core/files/tests/test_files_config.py +++ b/services/core/files/tests/test_files_config.py @@ -67,3 +67,55 @@ def test_allowed_external_hosts_missing_netloc_raises(self) -> None: """Test that URL without scheme/netloc raises ValueError from model validator.""" with pytest.raises(ValueError, match="must be a valid URL"): FilesConfig(allowed_external_hosts="https://valid.com,not-a-url") + + +class TestFilesConfigHuggingFaceRetries: + """Tests for Hugging Face retry configuration.""" + + def test_hf_retry_defaults(self) -> None: + """Test Hugging Face retry default values.""" + config = FilesConfig() + assert config.hf_retry_attempts == 4 + assert config.hf_retry_initial_delay_seconds == 0.5 + assert config.hf_retry_max_delay_seconds == 5.0 + + def test_hf_retry_from_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Test NMP_FILES_HF_RETRY_* env vars set Hugging Face retry config.""" + monkeypatch.setenv("NMP_FILES_HF_RETRY_ATTEMPTS", "7") + monkeypatch.setenv("NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS", "1") + monkeypatch.setenv("NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS", "30") + + config = FilesConfig() + + assert config.hf_retry_attempts == 7 + assert config.hf_retry_initial_delay_seconds == 1.0 + assert config.hf_retry_max_delay_seconds == 30.0 + + def test_hf_retry_from_yaml(self) -> None: + """Test files.hf_retry_* values load from YAML settings.""" + settings = { + "files": { + "hf_retry_attempts": 6, + "hf_retry_initial_delay_seconds": 2.5, + "hf_retry_max_delay_seconds": 20, + }, + } + + config = Configuration.global_settings_to_service_config(settings, FilesConfig) + + assert config.hf_retry_attempts == 6 + assert config.hf_retry_initial_delay_seconds == 2.5 + assert config.hf_retry_max_delay_seconds == 20.0 + + def test_hf_retry_invalid_values_raise(self) -> None: + """Test invalid Hugging Face retry values fail validation.""" + from pydantic import ValidationError + + with pytest.raises(ValidationError): + FilesConfig(hf_retry_attempts=0) + + with pytest.raises(ValidationError): + FilesConfig(hf_retry_initial_delay_seconds=-1) + + with pytest.raises(ValidationError): + FilesConfig(hf_retry_max_delay_seconds=-1) diff --git a/services/core/files/tests/test_huggingface_backend.py b/services/core/files/tests/test_huggingface_backend.py index 27e3f306a6..b0cd8d4171 100644 --- a/services/core/files/tests/test_huggingface_backend.py +++ b/services/core/files/tests/test_huggingface_backend.py @@ -9,10 +9,10 @@ import httpx import pytest from nmp.common.api.common import SecretRef +from nmp.common.config import Configuration from nmp.core.files.app.backends.base import ByteRange from nmp.core.files.app.backends.factory import storage_impl_factory from nmp.core.files.app.backends.huggingface import ( - _HF_TRANSIENT_RETRY_ATTEMPTS, HuggingfaceAccessError, HuggingfaceBackendError, HuggingfaceConfigError, @@ -21,6 +21,12 @@ HuggingfaceUnavailableError, raise_for_hf_status, ) +from nmp.core.files.config import FilesConfig, files_config + + +def _clear_files_config_cache() -> None: + Configuration.clear_cache() + files_config.cache_clear() def _hf_http_error_without_response(message: str): @@ -469,8 +475,9 @@ async def mock_chunks(): pass assert "Network error" in str(exc_info.value) - assert mock_stream.call_count == _HF_TRANSIENT_RETRY_ATTEMPTS - assert mock_sleep.await_count == _HF_TRANSIENT_RETRY_ATTEMPTS - 1 + retry_attempts = FilesConfig().hf_retry_attempts + assert mock_stream.call_count == retry_attempts + assert mock_sleep.await_count == retry_attempts - 1 async def test_validate_storage_success(hf_config, mock_hf_api, hf_secrets_empty): @@ -636,8 +643,9 @@ async def test_get_file_rate_limit_error(hf_config, hf_secrets_empty): await impl.get_file("test.txt") assert "Rate limited" in str(exc_info.value) - assert mock_metadata.call_count == _HF_TRANSIENT_RETRY_ATTEMPTS - assert mock_sleep.await_count == _HF_TRANSIENT_RETRY_ATTEMPTS - 1 + retry_attempts = FilesConfig().hf_retry_attempts + assert mock_metadata.call_count == retry_attempts + assert mock_sleep.await_count == retry_attempts - 1 async def test_get_file_rate_limit_uses_retry_env_override(hf_config, hf_secrets_empty, monkeypatch): @@ -645,22 +653,26 @@ async def test_get_file_rate_limit_uses_retry_env_override(hf_config, hf_secrets monkeypatch.setenv("NMP_FILES_HF_RETRY_ATTEMPTS", "3") monkeypatch.setenv("NMP_FILES_HF_RETRY_INITIAL_DELAY_SECONDS", "2") monkeypatch.setenv("NMP_FILES_HF_RETRY_MAX_DELAY_SECONDS", "3") + _clear_files_config_cache() - mock_error = _hf_http_error(429, "Rate limited") + try: + mock_error = _hf_http_error(429, "Rate limited") - with patch("nmp.core.files.app.backends.huggingface.get_hf_file_metadata") as mock_metadata: - mock_metadata.side_effect = mock_error + with patch("nmp.core.files.app.backends.huggingface.get_hf_file_metadata") as mock_metadata: + mock_metadata.side_effect = mock_error - impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) + impl = HuggingfaceStorageImpl(hf_config, hf_secrets_empty) - with ( - patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep, - pytest.raises(HuggingfaceUnavailableError), - ): - await impl.get_file("test.txt") + with ( + patch("nmp.core.files.app.backends.huggingface.sleep", new_callable=AsyncMock) as mock_sleep, + pytest.raises(HuggingfaceUnavailableError), + ): + await impl.get_file("test.txt") - assert mock_metadata.call_count == 3 - mock_sleep.assert_has_awaits([call(2.0), call(3.0)]) + assert mock_metadata.call_count == 3 + mock_sleep.assert_has_awaits([call(2.0), call(3.0)]) + finally: + _clear_files_config_cache() async def test_validate_storage_gated_repo_error(hf_config, mock_hf_api, mock_httpx_response, hf_secrets_empty):