hakula139 · hakula139 · Apr 24, 2026 · Apr 24, 2026 · Apr 24, 2026
diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs
@@ -18,7 +18,6 @@ use crate::util::env;
 
 const DEFAULT_MODEL: &str = "claude-opus-4-7";
 const DEFAULT_BASE_URL: &str = "https://api.anthropic.com";
-const DEFAULT_MAX_TOKENS: u32 = 16384;
 
 #[derive(Debug, Clone)]
 pub enum Auth {
@@ -243,15 +242,13 @@ impl Config {
     }
 }
 
-/// Per-effort `max_tokens` default. Matches claude-code 2.1.119's
-/// observed values: 64 K for the top two tiers (xhigh / max), 32 K
-/// for high, the legacy 16 384 for everything else. Users override
-/// via `ANTHROPIC_MAX_TOKENS` / `[client].max_tokens`.
+/// Per-effort `max_tokens` default; overridden by
+/// `ANTHROPIC_MAX_TOKENS` / `[client].max_tokens`.
 fn default_max_tokens(effort: Option<Effort>) -> u32 {
     match effort {
         Some(Effort::Xhigh | Effort::Max) => 64_000,
         Some(Effort::High) => 32_000,
-        _ => DEFAULT_MAX_TOKENS,
+        _ => 16_000,
     }
 }
 
@@ -416,10 +413,8 @@ mod tests {
 
     #[tokio::test]
     async fn load_defaults_apply_when_no_config_and_no_env() {
-        // Default model (Opus 4.7) supports `xhigh`, so both `effort`
-        // and `max_tokens` derive from that ceiling — matches the
-        // claude-code 2.1.119 packet capture. Prompt cache defaults
-        // to 1h (opt-out via `OX_PROMPT_CACHE_TTL=5m`).
+        // Opus 4.7 supports `xhigh`, so both `effort` and `max_tokens`
+        // derive from that ceiling. Prompt cache defaults to 1h.
         let dir = tempfile::tempdir().unwrap();
         let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load())
             .await
@@ -713,9 +708,9 @@ mod tests {
         assert_eq!(default_max_tokens(Some(Effort::Max)), 64_000);
         assert_eq!(default_max_tokens(Some(Effort::Xhigh)), 64_000);
         assert_eq!(default_max_tokens(Some(Effort::High)), 32_000);
-        assert_eq!(default_max_tokens(Some(Effort::Medium)), DEFAULT_MAX_TOKENS);
-        assert_eq!(default_max_tokens(Some(Effort::Low)), DEFAULT_MAX_TOKENS);
-        assert_eq!(default_max_tokens(None), DEFAULT_MAX_TOKENS);
+        assert_eq!(default_max_tokens(Some(Effort::Medium)), 16_000);
+        assert_eq!(default_max_tokens(Some(Effort::Low)), 16_000);
+        assert_eq!(default_max_tokens(None), 16_000);
     }
 
     // ── Config::load / prompt_cache_ttl ──

diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md
@@ -61,7 +61,7 @@ Tier guide (from the [Opus 4.7 migration guide](https://platform.claude.com/docs
 
 #### `max_tokens` — response ceiling
 
-When unset, oxide-code derives `max_tokens` from the resolved `effort` to match the claude-code reference: 64 000 for `xhigh` / `max`, 32 000 for `high`, 16 384 otherwise. Setting `max_tokens` explicitly (via TOML or `ANTHROPIC_MAX_TOKENS`) overrides the derivation.
+When unset, oxide-code derives `max_tokens` from the resolved `effort`: 64 000 for `xhigh` / `max`, 32 000 for `high`, 16 000 otherwise. Setting `max_tokens` explicitly (via TOML or `ANTHROPIC_MAX_TOKENS`) overrides the derivation.
 
 #### `prompt_cache_ttl` — cache duration
 

diff --git a/docs/research/anthropic-api.md b/docs/research/anthropic-api.md
@@ -49,15 +49,16 @@ anthropic-beta: claude-code-20250219,oauth-2025-04-20
 
 Additional useful betas:
 
-| Header                            | Purpose                                            |
-| --------------------------------- | -------------------------------------------------- |
-| `interleaved-thinking-2025-05-14` | Extended thinking support                          |
-| `context-1m-2025-08-07`           | 1M context window                                  |
-| `context-management-2025-06-27`   | Context management                                 |
-| `prompt-caching-scope-2026-01-05` | Prompt caching                                     |
-| `effort-2025-11-24`               | Effort control                                     |
-| `structured-outputs-2025-12-15`   | JSON-schema-constrained responses (one-shot calls) |
-| `advanced-tool-use-2025-11-20`    | Tool search (first-party only)                     |
+| Header                            | Purpose                                                      |
+| --------------------------------- | ------------------------------------------------------------ |
+| `interleaved-thinking-2025-05-14` | Extended thinking support                                    |
+| `context-1m-2025-08-07`           | 1M context window                                            |
+| `context-management-2025-06-27`   | Context management                                           |
+| `prompt-caching-scope-2026-01-05` | Prompt caching                                               |
+| `effort-2025-11-24`               | Effort control                                               |
+| `structured-outputs-2025-12-15`   | JSON-schema-constrained responses (one-shot calls)           |
+| `advanced-tool-use-2025-11-20`    | Tool search (first-party only)                               |
+| `task-budgets-2026-03-13`         | Advisory token budget across an agentic loop (Opus 4.7 only) |
 
 #### Per-model beta sets
 
@@ -232,7 +233,7 @@ GA as of Opus 4.6. Controls the intelligence-vs-latency tier of agentic turns vi
 - **The `effort-2025-11-24` beta header is necessary but not sufficient.** oxide-code used to send the header without the body field; the header became a no-op and the model ran at an undefined default.
 - **Per-model ceiling.** `max` is Opus-only; Sonnet 4.6 400s on it. `xhigh` is Opus 4.7-only. The `Capabilities::effort_max` / `effort_xhigh` flags encode this; `Capabilities::clamp_effort` clamps a user pick down to the highest supported level at or below it.
 - **Per-model default.** claude-code 2.1.119 sends `xhigh` on Opus 4.7, `high` on Opus 4.6 and Sonnet 4.6, omits the field entirely on earlier models. oxide-code mirrors this via `Capabilities::default_effort`.
-- **`max_tokens` should scale with effort.** claude-code uses 64 K on Opus 4.7 at `xhigh`, 32 K on Sonnet 4.6 at `high`. oxide-code's `default_max_tokens(effort)` applies the same scaling when the user hasn't set `ANTHROPIC_MAX_TOKENS` explicitly.
+- **`max_tokens` should scale with effort.** claude-code uses 64 K on Opus 4.7 at `xhigh`, 32 K on Sonnet 4.6 at `high`. oxide-code's `default_max_tokens(effort)` matches the upper tiers and uses 16 K otherwise when the user hasn't set `ANTHROPIC_MAX_TOKENS` explicitly.
 
 ### `context_management.edits`