From 72e824e53da7369c0a11b58e445e3285f31879eb Mon Sep 17 00:00:00 2001 From: Khaled Osman <2937633+khaledosman@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:03:36 +0200 Subject: [PATCH 1/2] fix(usage): treat 402 from the usage-report endpoint as non-retryable A 402 (payment required / insufficient balance) returned by the platform's /gateway/usage endpoint is a permanent rejection: an overdrawn or missing org wallet won't recover within the retry window, so retrying only hammers the platform. Add 402 to _USAGE_NON_RETRYABLE_STATUS_CODES alongside 401/404/409/422. This is behavior-preserving today (402 is already excluded by the >= 500 retry predicate), but makes the intent explicit and keeps the usage reporter robust to future changes in that predicate. Add a unit test asserting a single POST with no retry on 402. Created with Claude Code. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/gateway/api/routes/_platform.py | 9 ++++--- tests/unit/test_run_platform_attempts.py | 32 +++++++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/gateway/api/routes/_platform.py b/src/gateway/api/routes/_platform.py index 11086d25..87c27224 100644 --- a/src/gateway/api/routes/_platform.py +++ b/src/gateway/api/routes/_platform.py @@ -30,9 +30,12 @@ T = TypeVar("T") # Status codes returned by the platform's usage-report endpoint that the -# gateway should NOT retry. Auth / not-found / conflict / unprocessable are -# all permanent rejection signals — retrying would just hammer the platform. -_USAGE_NON_RETRYABLE_STATUS_CODES = {401, 404, 409, 422} +# gateway should NOT retry. Auth / payment-required / not-found / conflict / +# unprocessable are all permanent rejection signals — retrying would just +# hammer the platform (an overdrawn or missing wallet won't recover within the +# retry window). 402 is already excluded by the >= 500 retry predicate below; +# listing it keeps the intent explicit and robust to changes in that predicate. +_USAGE_NON_RETRYABLE_STATUS_CODES = {401, 402, 404, 409, 422} # Status codes that cause the gateway to move on to the next attempt in a # multi-attempt route. 401/403 are included because users configure diff --git a/tests/unit/test_run_platform_attempts.py b/tests/unit/test_run_platform_attempts.py index d56e98a7..4641690f 100644 --- a/tests/unit/test_run_platform_attempts.py +++ b/tests/unit/test_run_platform_attempts.py @@ -7,12 +7,18 @@ from __future__ import annotations -from typing import Any +import asyncio +from types import SimpleNamespace +from typing import Any, cast +from unittest.mock import AsyncMock +import httpx import pytest from fastapi import HTTPException +from gateway.api.routes import _platform from gateway.api.routes._platform import ResolvedRoute, run_platform_attempts +from gateway.core.config import GatewayConfig @pytest.mark.asyncio @@ -43,3 +49,27 @@ async def _never_called(_kwargs: dict[str, Any], _on_first_response: Any) -> Any ) assert ei.value.status_code == 500 assert "empty attempts list" in ei.value.detail + + +@pytest.mark.asyncio +async def test_report_platform_usage_does_not_retry_on_402(monkeypatch: pytest.MonkeyPatch) -> None: + """A 402 from the usage-report endpoint is a permanent rejection (the org + wallet is overdrawn or missing and won't recover within the retry window). + The gateway must POST once and give up, never retry.""" + config = cast( + GatewayConfig, + SimpleNamespace( + platform={"base_url": "http://platform", "usage_max_retries": 3}, + platform_token="gw-test", + ), + ) + + post_mock = AsyncMock(return_value=httpx.Response(402)) + monkeypatch.setattr(_platform, "_post_platform", post_mock) + sleep_mock = AsyncMock() + monkeypatch.setattr(asyncio, "sleep", sleep_mock) + + await _platform._report_platform_usage(config, "corr-1", "success", None) + + assert post_mock.call_count == 1 + sleep_mock.assert_not_awaited() From 21b121ba3b662a3b0106395cc2e5406a992da5c9 Mon Sep 17 00:00:00 2001 From: Khaled Osman <2937633+khaledosman@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:17:56 +0200 Subject: [PATCH 2/2] docs(usage): note payment-required in non-retryable docstring; pin 402 in test Address review feedback on the usage-report 402 change: - Update the _report_platform_usage docstring to list payment-required among the non-retryable status codes, matching _USAGE_NON_RETRYABLE_STATUS_CODES. - Assert 402 is in the non-retryable set so the unit test guards the classification itself, not just the (currently equivalent) >= 500 retry behaviour. Created with Claude Code. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/gateway/api/routes/_platform.py | 3 ++- tests/unit/test_run_platform_attempts.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gateway/api/routes/_platform.py b/src/gateway/api/routes/_platform.py index 87c27224..e7146481 100644 --- a/src/gateway/api/routes/_platform.py +++ b/src/gateway/api/routes/_platform.py @@ -587,7 +587,8 @@ async def _report_platform_usage( Best-effort — failures are swallowed after ``max_retries`` so they don't impact the user's response path. Non-retryable status codes (auth / - not-found / conflict / unprocessable) short-circuit the retry loop. + payment-required / not-found / conflict / unprocessable) short-circuit the + retry loop. """ platform_base_url = config.platform.get("base_url") if not platform_base_url: diff --git a/tests/unit/test_run_platform_attempts.py b/tests/unit/test_run_platform_attempts.py index 4641690f..b95c351e 100644 --- a/tests/unit/test_run_platform_attempts.py +++ b/tests/unit/test_run_platform_attempts.py @@ -73,3 +73,7 @@ async def test_report_platform_usage_does_not_retry_on_402(monkeypatch: pytest.M assert post_mock.call_count == 1 sleep_mock.assert_not_awaited() + # Pin the classification itself, not just the (currently equivalent) retry + # behaviour: 402 must stay in the non-retryable set even if the >= 500 retry + # predicate changes. + assert 402 in _platform._USAGE_NON_RETRYABLE_STATUS_CODES