From 3b469d95758a319ee626ea734978c3dfc8c65f80 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Fri, 24 Apr 2026 17:40:09 +0800 Subject: [PATCH 1/2] refactor(config): inline DEFAULT_MAX_TOKENS as 16_000 Used in exactly one place; the named constant obscures the tier table that already lives inline with 64_000 and 32_000. Drop the constant, rebase the default to 16_000 for consistency with the other tiers, and trim narrative references from the `default_max_tokens` doc and a test comment. --- crates/oxide-code/src/config.rs | 21 ++++++++------------- docs/guide/configuration.md | 2 +- docs/research/anthropic-api.md | 2 +- 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index 986d4bc..f17e53b 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -18,7 +18,6 @@ use crate::util::env; const DEFAULT_MODEL: &str = "claude-opus-4-7"; const DEFAULT_BASE_URL: &str = "https://api.anthropic.com"; -const DEFAULT_MAX_TOKENS: u32 = 16384; #[derive(Debug, Clone)] pub enum Auth { @@ -243,15 +242,13 @@ impl Config { } } -/// Per-effort `max_tokens` default. Matches claude-code 2.1.119's -/// observed values: 64 K for the top two tiers (xhigh / max), 32 K -/// for high, the legacy 16 384 for everything else. Users override -/// via `ANTHROPIC_MAX_TOKENS` / `[client].max_tokens`. +/// Per-effort `max_tokens` default; overridden by +/// `ANTHROPIC_MAX_TOKENS` / `[client].max_tokens`. fn default_max_tokens(effort: Option) -> u32 { match effort { Some(Effort::Xhigh | Effort::Max) => 64_000, Some(Effort::High) => 32_000, - _ => DEFAULT_MAX_TOKENS, + _ => 16_000, } } @@ -416,10 +413,8 @@ mod tests { #[tokio::test] async fn load_defaults_apply_when_no_config_and_no_env() { - // Default model (Opus 4.7) supports `xhigh`, so both `effort` - // and `max_tokens` derive from that ceiling — matches the - // claude-code 2.1.119 packet capture. Prompt cache defaults - // to 1h (opt-out via `OX_PROMPT_CACHE_TTL=5m`). + // Opus 4.7 supports `xhigh`, so both `effort` and `max_tokens` + // derive from that ceiling. Prompt cache defaults to 1h. let dir = tempfile::tempdir().unwrap(); let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) .await @@ -713,9 +708,9 @@ mod tests { assert_eq!(default_max_tokens(Some(Effort::Max)), 64_000); assert_eq!(default_max_tokens(Some(Effort::Xhigh)), 64_000); assert_eq!(default_max_tokens(Some(Effort::High)), 32_000); - assert_eq!(default_max_tokens(Some(Effort::Medium)), DEFAULT_MAX_TOKENS); - assert_eq!(default_max_tokens(Some(Effort::Low)), DEFAULT_MAX_TOKENS); - assert_eq!(default_max_tokens(None), DEFAULT_MAX_TOKENS); + assert_eq!(default_max_tokens(Some(Effort::Medium)), 16_000); + assert_eq!(default_max_tokens(Some(Effort::Low)), 16_000); + assert_eq!(default_max_tokens(None), 16_000); } // ── Config::load / prompt_cache_ttl ── diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 86e83cb..10dae02 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -61,7 +61,7 @@ Tier guide (from the [Opus 4.7 migration guide](https://platform.claude.com/docs #### `max_tokens` — response ceiling -When unset, oxide-code derives `max_tokens` from the resolved `effort` to match the claude-code reference: 64 000 for `xhigh` / `max`, 32 000 for `high`, 16 384 otherwise. Setting `max_tokens` explicitly (via TOML or `ANTHROPIC_MAX_TOKENS`) overrides the derivation. +When unset, oxide-code derives `max_tokens` from the resolved `effort`: 64 000 for `xhigh` / `max`, 32 000 for `high`, 16 000 otherwise. Setting `max_tokens` explicitly (via TOML or `ANTHROPIC_MAX_TOKENS`) overrides the derivation. #### `prompt_cache_ttl` — cache duration diff --git a/docs/research/anthropic-api.md b/docs/research/anthropic-api.md index 99ac050..54bb78d 100644 --- a/docs/research/anthropic-api.md +++ b/docs/research/anthropic-api.md @@ -232,7 +232,7 @@ GA as of Opus 4.6. Controls the intelligence-vs-latency tier of agentic turns vi - **The `effort-2025-11-24` beta header is necessary but not sufficient.** oxide-code used to send the header without the body field; the header became a no-op and the model ran at an undefined default. - **Per-model ceiling.** `max` is Opus-only; Sonnet 4.6 400s on it. `xhigh` is Opus 4.7-only. The `Capabilities::effort_max` / `effort_xhigh` flags encode this; `Capabilities::clamp_effort` clamps a user pick down to the highest supported level at or below it. - **Per-model default.** claude-code 2.1.119 sends `xhigh` on Opus 4.7, `high` on Opus 4.6 and Sonnet 4.6, omits the field entirely on earlier models. oxide-code mirrors this via `Capabilities::default_effort`. -- **`max_tokens` should scale with effort.** claude-code uses 64 K on Opus 4.7 at `xhigh`, 32 K on Sonnet 4.6 at `high`. oxide-code's `default_max_tokens(effort)` applies the same scaling when the user hasn't set `ANTHROPIC_MAX_TOKENS` explicitly. +- **`max_tokens` should scale with effort.** claude-code uses 64 K on Opus 4.7 at `xhigh`, 32 K on Sonnet 4.6 at `high`. oxide-code's `default_max_tokens(effort)` matches the upper tiers and uses 16 K otherwise when the user hasn't set `ANTHROPIC_MAX_TOKENS` explicitly. ### `context_management.edits` From b1ca79a81dd2f4515df783bfd764a673fa220c6d Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Fri, 24 Apr 2026 17:53:22 +0800 Subject: [PATCH 2/2] docs(research): note task-budgets-2026-03-13 beta in anthropic-api MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New Opus 4.7-exclusive beta that ships alongside `xhigh` / adaptive thinking. oxide-code doesn't implement it today — the beta is opt-in and paired with an `output_config.task_budget` body field. Document its existence in the additional-betas descriptor table so the research notes stay comprehensive for readers tracking the 4.7 surface. --- docs/research/anthropic-api.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/research/anthropic-api.md b/docs/research/anthropic-api.md index 54bb78d..65431ce 100644 --- a/docs/research/anthropic-api.md +++ b/docs/research/anthropic-api.md @@ -49,15 +49,16 @@ anthropic-beta: claude-code-20250219,oauth-2025-04-20 Additional useful betas: -| Header | Purpose | -| --------------------------------- | -------------------------------------------------- | -| `interleaved-thinking-2025-05-14` | Extended thinking support | -| `context-1m-2025-08-07` | 1M context window | -| `context-management-2025-06-27` | Context management | -| `prompt-caching-scope-2026-01-05` | Prompt caching | -| `effort-2025-11-24` | Effort control | -| `structured-outputs-2025-12-15` | JSON-schema-constrained responses (one-shot calls) | -| `advanced-tool-use-2025-11-20` | Tool search (first-party only) | +| Header | Purpose | +| --------------------------------- | ------------------------------------------------------------ | +| `interleaved-thinking-2025-05-14` | Extended thinking support | +| `context-1m-2025-08-07` | 1M context window | +| `context-management-2025-06-27` | Context management | +| `prompt-caching-scope-2026-01-05` | Prompt caching | +| `effort-2025-11-24` | Effort control | +| `structured-outputs-2025-12-15` | JSON-schema-constrained responses (one-shot calls) | +| `advanced-tool-use-2025-11-20` | Tool search (first-party only) | +| `task-budgets-2026-03-13` | Advisory token budget across an agentic loop (Opus 4.7 only) | #### Per-model beta sets