diff --git a/.cspell/words.txt b/.cspell/words.txt
index e95fc06..f9f20fe 100644
--- a/.cspell/words.txt
+++ b/.cspell/words.txt
@@ -61,5 +61,6 @@ unresumable
 urandom
 venv
 vtable
+xhigh
 xxhash
 yazi
diff --git a/crates/oxide-code/src/client/anthropic.rs b/crates/oxide-code/src/client/anthropic.rs
index cc70362..1e20c99 100644
--- a/crates/oxide-code/src/client/anthropic.rs
+++ b/crates/oxide-code/src/client/anthropic.rs
@@ -24,7 +24,7 @@ use tracing::debug;
 use uuid::Uuid;
 
 use super::billing;
-use crate::config::{Auth, Config, ThinkingConfig};
+use crate::config::{Auth, Config, Effort, PromptCacheTtl, ThinkingConfig};
 use crate::message::{ContentBlock, Message, Role};
 use crate::prompt::SYSTEM_PROMPT_DYNAMIC_BOUNDARY;
 use crate::tool::ToolDefinition;
@@ -39,8 +39,10 @@ const OAUTH_BETA_HEADER: &str = "oauth-2025-04-20";
 const PROMPT_CACHING_SCOPE_BETA_HEADER: &str = "prompt-caching-scope-2026-01-05";
 const STRUCTURED_OUTPUTS_BETA_HEADER: &str = "structured-outputs-2025-12-15";
 
-/// Matches the installed Claude Code version.
-const CLAUDE_CLI_VERSION: &str = "2.1.101";
+/// Matches the installed Claude Code version. The rest of this PR is
+/// pinned against 2.1.119 packet captures; keep the wire
+/// `User-Agent` / `cc_version` claim aligned.
+const CLAUDE_CLI_VERSION: &str = "2.1.119";
 
 /// OAuth-required identity prefix. The Anthropic API returns 429 for non-Haiku
 /// models with OAuth tokens unless the system prompt starts with this exact
@@ -64,20 +66,66 @@ struct CreateMessageRequest<'a> {
     tools: Option<&'a [ToolDefinition]>,
     #[serde(skip_serializing_if = "Option::is_none")]
     thinking: Option<&'a ThinkingConfig>,
-    /// JSON-schema-constrained output format for one-shot utility calls
-    /// (title generation, future classifiers). Must travel alongside the
-    /// `structured-outputs-2025-12-15` beta header; both are gated on
-    /// `Capabilities::structured_outputs` so unsupported models silently
-    /// drop back to free-form text rather than 400ing the gateway.
+    /// Carries both the `format` (JSON-schema-constrained output for
+    /// one-shot calls) and `effort` (agentic-path intelligence tier)
+    /// knobs. Wrapped in `Option` so an empty `OutputConfig` never
+    /// ships — callers build one via [`OutputConfig::new`] and pass
+    /// `None` when neither sub-field is set.
     #[serde(skip_serializing_if = "Option::is_none")]
     output_config: Option<OutputConfig<'a>>,
+    /// `context_management.edits` — the client-side context-editing
+    /// directive that partners the `context-management-2025-06-27`
+    /// beta header. Populated on the streaming path for any model
+    /// with [`Capabilities::context_management`] set.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    context_management: Option<ContextManagement>,
     messages: &'a [Message],
 }
 
-/// Wrapper matching the wire shape `output_config.format = {...}`.
+/// Shared wrapper for the `output_config` body field. Either field
+/// may be absent; when both are, [`Self::new`] returns `None` so the
+/// builder never ships an empty object.
 #[derive(Serialize)]
 struct OutputConfig<'a> {
-    format: &'a OutputFormat,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    format: Option<&'a OutputFormat>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    effort: Option<Effort>,
+}
+
+impl<'a> OutputConfig<'a> {
+    /// Returns `None` when every field is empty so callers can avoid
+    /// shipping a bare `{}`. `Some(_)` otherwise.
+    fn new(format: Option<&'a OutputFormat>, effort: Option<Effort>) -> Option<Self> {
+        (format.is_some() || effort.is_some()).then_some(Self { format, effort })
+    }
+}
+
+/// `context_management.edits` body field. oxide-code mirrors
+/// claude-code 2.1.119's observed wire shape — a single
+/// `clear_thinking_20251015` edit with `keep = "all"` on every
+/// agentic request that also ships the matching beta header.
+#[derive(Serialize)]
+struct ContextManagement {
+    edits: [ContextEdit; 1],
+}
+
+impl ContextManagement {
+    /// Wire shape claude-code 2.1.119 sends on every 4.6+ request.
+    /// Single place to edit when Anthropic ships newer edit types or
+    /// we need to diverge from the default.
+    fn clear_thinking_keep_all() -> Self {
+        Self {
+            edits: [ContextEdit::ClearThinking20251015 { keep: "all" }],
+        }
+    }
+}
+
+#[derive(Serialize)]
+#[serde(tag = "type")]
+enum ContextEdit {
+    #[serde(rename = "clear_thinking_20251015")]
+    ClearThinking20251015 { keep: &'static str },
 }
 
 /// JSON-schema-constrained completion format. Constructed via
@@ -128,6 +176,8 @@ struct SystemBlock<'a> {
 /// Prompt caching control. The `scope` field determines the cache sharing
 /// level: `"global"` for static content identical across sessions (1P only),
 /// `None` for the default org-scoped ephemeral cache (universally accepted).
+/// The `ttl` field overrides the server default (5 m as of 2026-03) —
+/// oxide-code defaults to `"1h"`, opt-out via `prompt_cache_ttl = "5m"`.
 ///
 /// `scope: "global"` must be a true prefix of all preceding request content
 /// — the server rejects a global-scoped block preceded by a non-global
@@ -138,6 +188,8 @@ struct CacheControl {
     r#type: &'static str,
     #[serde(skip_serializing_if = "Option::is_none")]
     scope: Option<&'static str>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    ttl: Option<&'static str>,
 }
 
 // ── SSE response types ──
@@ -438,7 +490,10 @@ impl Client {
             system_blocks.push(SystemBlock {
                 r#type: "text",
                 text: &static_joined,
-                cache_control: Some(static_prefix_cache_control(is_first_party)),
+                cache_control: Some(static_prefix_cache_control(
+                    is_first_party,
+                    self.config.prompt_cache_ttl,
+                )),
             });
         }
         if !dynamic_joined.is_empty() {
@@ -449,6 +504,8 @@ impl Client {
             });
         }
 
+        let caps = crate::model::capabilities_for(&self.config.model);
+
         let url = format!("{}/v1/messages?beta=true", self.config.base_url);
         let mut body = serde_json::to_string(&CreateMessageRequest {
             // `[1m]` is a client-side tag; strip before the wire.
@@ -459,7 +516,14 @@ impl Client {
             system: system_blocks,
             tools: (!tools.is_empty()).then_some(tools),
             thinking: self.config.thinking.as_ref(),
-            output_config: None,
+            output_config: OutputConfig::new(None, self.config.effort),
+            // Gated on the same capability flag as the
+            // `context-management-2025-06-27` beta header so body and
+            // header stay in sync — claude-code 2.1.119 ships them
+            // together on every 4.6+ agentic request.
+            context_management: caps
+                .context_management
+                .then(ContextManagement::clear_thinking_keep_all),
             messages: effective_messages,
         })
         .context("failed to serialize request")?;
@@ -587,9 +651,7 @@ fn compute_betas(
     want_structured: bool,
     is_first_party: bool,
 ) -> Vec<&'static str> {
-    let caps = crate::model::lookup(model)
-        .map(|info| info.capabilities)
-        .unwrap_or_default();
+    let caps = crate::model::capabilities_for(model);
     let is_haiku = model.to_lowercase().contains("haiku");
 
     // Order mirrors `docs/research/anthropic-api.md` → Per-model beta
@@ -642,7 +704,7 @@ fn compute_betas(
 /// Whether the target model accepts the `structured-outputs-2025-12-15`
 /// beta. Thin wrapper over the capability table for pre-checks.
 pub(crate) fn supports_structured_outputs(model: &str) -> bool {
-    crate::model::lookup(model).is_some_and(|info| info.capabilities.structured_outputs)
+    crate::model::capabilities_for(model).structured_outputs
 }
 
 /// Whether `base_url` points at the first-party Anthropic API, gating
@@ -669,11 +731,13 @@ fn is_first_party_base_url(base_url: &str) -> bool {
 /// global scope so the prefix is shared across sessions; on 3P, fall
 /// back to the default (org-scoped) ephemeral cache — 3P gateways
 /// reject `scope: "global"` because tool definitions render first and
-/// taint the cache prefix.
-fn static_prefix_cache_control(is_first_party: bool) -> CacheControl {
+/// taint the cache prefix. `ttl` overrides the server default (5 m)
+/// when set via `config.prompt_cache_ttl`.
+fn static_prefix_cache_control(is_first_party: bool, ttl: PromptCacheTtl) -> CacheControl {
     CacheControl {
         r#type: "ephemeral",
         scope: is_first_party.then_some("global"),
+        ttl: ttl.wire(),
     }
 }
 
@@ -754,7 +818,10 @@ fn build_completion_body(
         system: system_blocks,
         tools: None,
         thinking: None,
-        output_config: output_format.map(|format| OutputConfig { format }),
+        output_config: OutputConfig::new(output_format, None),
+        // One-shot completions never opt into context management —
+        // matches claude-code's one-shot path.
+        context_management: None,
         messages: &messages,
     })
     .context("failed to serialize request")?;
@@ -966,9 +1033,11 @@ fn parse_sse_frame(frame: &str) -> Result<Option<StreamEvent>> {
 pub(crate) fn test_config(base_url: impl Into<String>, auth: Auth, model: &str) -> Config {
     Config {
         auth,
-        model: model.to_owned(),
         base_url: base_url.into(),
+        model: model.to_owned(),
+        effort: None,
         max_tokens: 128,
+        prompt_cache_ttl: PromptCacheTtl::OneHour,
         thinking: None,
         show_thinking: false,
     }
@@ -1021,13 +1090,17 @@ mod tests {
         Auth::OAuth("t".to_owned())
     }
 
-    /// Concatenates SSE frames into a valid response body, each
-    /// followed by the required `\n\n` terminator.
-    fn sse_body(frames: &[&str]) -> String {
+    /// Builds an SSE response body from `(event, data)` pairs. Each
+    /// frame is emitted as `event: <name>\ndata: <json>\n\n`, encoding
+    /// the frame-separator invariant in one place so call sites don't
+    /// hand-roll it (and can't silently omit the `\n\n`).
+    fn sse_body(frames: &[(&str, &str)]) -> String {
+        use std::fmt::Write;
         let mut body = String::new();
-        for f in frames {
-            body.push_str(f);
-            body.push_str("\n\n");
+        for (event, data) in frames {
+            writeln!(body, "event: {event}").unwrap();
+            writeln!(body, "data: {data}").unwrap();
+            body.push('\n');
         }
         body
     }
@@ -1045,18 +1118,27 @@ mod tests {
     /// Well-formed SSE body for a short text response.
     fn text_stream_body() -> String {
         sse_body(&[
-            r#"event: message_start
-data: {"type":"message_start","message":{"id":"msg_1","model":"claude-sonnet-4-6","usage":{"input_tokens":5,"output_tokens":0}}}"#,
-            r#"event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
-            r#"event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hi"}}"#,
-            r#"event: content_block_stop
-data: {"type":"content_block_stop","index":0}"#,
-            r#"event: message_delta
-data: {"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":1}}"#,
-            r#"event: message_stop
-data: {"type":"message_stop"}"#,
+            (
+                "message_start",
+                r#"{"type":"message_start","message":{"id":"msg_1","model":"claude-sonnet-4-6","usage":{"input_tokens":5,"output_tokens":0}}}"#,
+            ),
+            (
+                "content_block_start",
+                r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
+            ),
+            (
+                "content_block_delta",
+                r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hi"}}"#,
+            ),
+            (
+                "content_block_stop",
+                r#"{"type":"content_block_stop","index":0}"#,
+            ),
+            (
+                "message_delta",
+                r#"{"type":"message_delta","delta":{"stop_reason":"end_turn"},"usage":{"output_tokens":1}}"#,
+            ),
+            ("message_stop", r#"{"type":"message_stop"}"#),
         ])
     }
 
@@ -1369,12 +1451,15 @@ data: {"type":"message_stop"}"#,
         // would mangle a 4-byte emoji split across TCP chunk boundaries.
         let server = MockServer::start().await;
         let body = sse_body(&[
-            r#"event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
-            r#"event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"🦀rust"}}"#,
-            r#"event: message_stop
-data: {"type":"message_stop"}"#,
+            (
+                "content_block_start",
+                r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
+            ),
+            (
+                "content_block_delta",
+                r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"🦀rust"}}"#,
+            ),
+            ("message_stop", r#"{"type":"message_stop"}"#),
         ]);
         Mock::given(method("POST"))
             .and(path("/v1/messages"))
@@ -1414,14 +1499,16 @@ data: {"type":"message_stop"}"#,
         // one bad frame cannot poison the whole turn.
         let server = MockServer::start().await;
         let body = sse_body(&[
-            r#"event: content_block_start
-data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
-            r"event: content_block_delta
-data: {not valid json",
-            r#"event: content_block_delta
-data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hi"}}"#,
-            r#"event: message_stop
-data: {"type":"message_stop"}"#,
+            (
+                "content_block_start",
+                r#"{"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}"#,
+            ),
+            ("content_block_delta", "{not valid json"),
+            (
+                "content_block_delta",
+                r#"{"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hi"}}"#,
+            ),
+            ("message_stop", r#"{"type":"message_stop"}"#),
         ]);
         Mock::given(method("POST"))
             .and(path("/v1/messages"))
@@ -1460,8 +1547,10 @@ data: {"type":"message_stop"}"#,
         // `StreamEvent::Error` flows as `Ok(Error { .. })` on the channel;
         // the caller (`agent.rs`) converts it to a bail!.
         let server = MockServer::start().await;
-        let body = sse_body(&[r#"event: error
-data: {"type":"error","error":{"type":"overloaded_error","message":"Servers overloaded"}}"#]);
+        let body = sse_body(&[(
+            "error",
+            r#"{"type":"error","error":{"type":"overloaded_error","message":"Servers overloaded"}}"#,
+        )]);
         Mock::given(method("POST"))
             .and(path("/v1/messages"))
             .respond_with(
@@ -1548,7 +1637,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
         let mut rx = client
             .stream_message(&[Message::user("hi")], &[], None, &[])
             .unwrap();
-        let _ = rx.recv().await;
+        _ = rx.recv().await;
         drop(rx);
         // Lets the background task observe the closed channel and exit;
         // any panic would surface in test output.
@@ -1776,6 +1865,128 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
             cc.get("scope").is_none(),
             "scope field omitted entirely on 3P (not null): {body}",
         );
+        // TTL rides through on 3P — only `scope` is gated on 1P.
+        assert_eq!(cc["ttl"], "1h", "default 1h ttl survives on 3P: {body}");
+    }
+
+    // ── Client::stream_message / agentic body fields ──
+
+    /// Captures the serialized body of a single streaming request.
+    /// Most agentic-body tests only care about what oxide-code sends,
+    /// not the response — this collapses the ceremony to two lines
+    /// per test.
+    async fn capture_stream_body(config: Config) -> serde_json::Value {
+        let server = MockServer::start().await;
+        let sink: Captured<String> = captured();
+        let sink_clone = std::sync::Arc::clone(&sink);
+        Mock::given(method("POST"))
+            .and(path("/v1/messages"))
+            .respond_with(move |req: &Request| {
+                *sink_clone.lock().unwrap() = Some(String::from_utf8_lossy(&req.body).into_owned());
+                ResponseTemplate::new(200)
+                    .insert_header("content-type", "text/event-stream")
+                    .set_body_string(text_stream_body())
+            })
+            .mount(&server)
+            .await;
+
+        let mut cfg = config;
+        cfg.base_url = server.uri();
+        let client = Client::new(cfg, Some("sid".to_owned())).unwrap();
+        collect_events(
+            client
+                .stream_message(&[Message::user("hi")], &[], None, &[])
+                .unwrap(),
+        )
+        .await
+        .unwrap();
+        let body = sink.lock().unwrap().clone().expect("request captured");
+        serde_json::from_str(&body).unwrap()
+    }
+
+    #[tokio::test]
+    async fn stream_message_opus_4_7_emits_output_config_effort_xhigh() {
+        let mut cfg = test_config("https://placeholder.invalid", api_key(), "claude-opus-4-7");
+        cfg.effort = Some(Effort::Xhigh);
+        let body = capture_stream_body(cfg).await;
+        assert_eq!(body["output_config"]["effort"], "xhigh");
+    }
+
+    #[tokio::test]
+    async fn stream_message_omits_output_config_when_effort_is_none() {
+        // Non-effort-capable model → `Config.effort == None` → the
+        // whole `output_config` block is absent (not `{}`).
+        let cfg = test_config(
+            "https://placeholder.invalid",
+            api_key(),
+            "claude-sonnet-4-5",
+        );
+        assert!(cfg.effort.is_none(), "precondition: effort unset");
+        let body = capture_stream_body(cfg).await;
+        assert!(
+            body.get("output_config").is_none(),
+            "output_config absent: {body}",
+        );
+    }
+
+    #[tokio::test]
+    async fn stream_message_context_management_body_present_on_4_6_plus() {
+        // Every model whose `context_management` capability flag is
+        // set must also ship the body directive alongside the beta
+        // header.
+        for model in [
+            "claude-opus-4-7",
+            "claude-opus-4-6",
+            "claude-sonnet-4-6",
+            "claude-haiku-4-5",
+        ] {
+            let cfg = test_config("https://placeholder.invalid", api_key(), model);
+            let body = capture_stream_body(cfg).await;
+            let edits = body["context_management"]["edits"]
+                .as_array()
+                .unwrap_or_else(|| panic!("context_management.edits missing for {model}: {body}"));
+            assert_eq!(edits.len(), 1, "{model}");
+            assert_eq!(edits[0]["type"], "clear_thinking_20251015", "{model}");
+            assert_eq!(edits[0]["keep"], "all", "{model}");
+        }
+    }
+
+    #[tokio::test]
+    async fn stream_message_context_management_absent_on_unknown_model() {
+        // Unknown model ids (no `MODELS` row matches) fall back to
+        // the all-false `Capabilities::default()` — no beta, no body
+        // directive. Keeps "beta sent ⇒ body populated" an invariant.
+        let cfg = test_config("https://placeholder.invalid", api_key(), "claude-opus-5-0");
+        let body = capture_stream_body(cfg).await;
+        assert!(
+            body.get("context_management").is_none(),
+            "context_management absent on unknown models: {body}",
+        );
+    }
+
+    #[tokio::test]
+    async fn stream_message_show_thinking_emits_display_summarized() {
+        let mut cfg = test_config("https://placeholder.invalid", api_key(), "claude-opus-4-7");
+        cfg.thinking = Some(ThinkingConfig::Adaptive {
+            display: Some(crate::config::ThinkingDisplay::Summarized),
+        });
+        let body = capture_stream_body(cfg).await;
+        assert_eq!(body["thinking"]["type"], "adaptive");
+        assert_eq!(body["thinking"]["display"], "summarized");
+    }
+
+    #[tokio::test]
+    async fn stream_message_show_thinking_false_omits_display_field() {
+        // `Adaptive { display: None }` must serialize without a
+        // `display` key — `skip_serializing_if` on the wire.
+        let mut cfg = test_config("https://placeholder.invalid", api_key(), "claude-opus-4-7");
+        cfg.thinking = Some(ThinkingConfig::Adaptive { display: None });
+        let body = capture_stream_body(cfg).await;
+        assert_eq!(body["thinking"]["type"], "adaptive");
+        assert!(
+            body["thinking"].get("display").is_none(),
+            "display field absent: {body}",
+        );
     }
 
     // ── Client::complete ──
@@ -1864,7 +2075,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
                 Some("sid".to_owned()),
             )
             .unwrap();
-            let _ = client
+            _ = client
                 .complete(model, "sys", "prompt", 40, Some(&fmt))
                 .await
                 .unwrap();
@@ -1916,7 +2127,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
             Some("sid".to_owned()),
         )
         .unwrap();
-        let _ = client
+        _ = client
             .complete("claude-haiku-4-5", "", "hi", 40, None)
             .await
             .unwrap();
@@ -1934,6 +2145,41 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
         assert!(!body.contains("cch=00000"), "cch populated: {body}");
     }
 
+    #[tokio::test]
+    async fn complete_does_not_emit_context_management_edits() {
+        // `context_management.edits` is an agentic-path directive; it
+        // must stay off the one-shot `complete` path even on models
+        // that carry the capability flag (Haiku 4.5 here).
+        let server = MockServer::start().await;
+        let sink: Captured<String> = captured();
+        let sink_clone = std::sync::Arc::clone(&sink);
+        Mock::given(method("POST"))
+            .and(path("/v1/messages"))
+            .respond_with(move |req: &Request| {
+                *sink_clone.lock().unwrap() = Some(String::from_utf8_lossy(&req.body).into_owned());
+                ResponseTemplate::new(200).set_body_string(completion_body("ok"))
+            })
+            .mount(&server)
+            .await;
+
+        let client = Client::new(
+            test_config(server.uri(), api_key(), "claude-haiku-4-5"),
+            Some("sid".to_owned()),
+        )
+        .unwrap();
+        _ = client
+            .complete("claude-haiku-4-5", "sys", "hi", 40, None)
+            .await
+            .unwrap();
+
+        let body = sink.lock().unwrap().clone().expect("body captured");
+        let v: serde_json::Value = serde_json::from_str(&body).unwrap();
+        assert!(
+            v.get("context_management").is_none(),
+            "context_management absent on one-shot path: {body}",
+        );
+    }
+
     // ── build_metadata ──
 
     #[test]
@@ -1949,7 +2195,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
     // ── compute_betas ──
 
     #[test]
-    fn compute_betas_agentic_opus_46_plain_carries_full_set_except_1m() {
+    fn compute_betas_agentic_opus_4_6_plain_carries_full_set_except_1m() {
         // Plain model (no `[1m]` tag) must not auto-enable 1M context —
         // a gateway without 1M access would 400.
         let betas = compute_betas("claude-opus-4-6", &api_key(), true, false, true);
@@ -1964,7 +2210,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
     }
 
     #[test]
-    fn compute_betas_opus_46_with_1m_tag_adds_context_1m() {
+    fn compute_betas_opus_4_6_with_1m_tag_adds_context_1m() {
         let betas = compute_betas("claude-opus-4-6[1m]", &api_key(), true, false, true);
         assert!(betas.contains(&CONTEXT_1M_BETA_HEADER));
         assert!(betas.contains(&EFFORT_BETA_HEADER));
@@ -1977,7 +2223,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
     }
 
     #[test]
-    fn compute_betas_sonnet_45_has_thinking_but_not_effort() {
+    fn compute_betas_sonnet_4_5_has_thinking_but_not_effort() {
         // Sonnet 4.5 supports interleaved thinking but not effort;
         // plain (no `[1m]` tag) means no 1M beta either.
         let betas = compute_betas("claude-sonnet-4-5", &api_key(), true, false, true);
@@ -2030,7 +2276,7 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
     }
 
     #[test]
-    fn compute_betas_opus_47_matches_opus_46_family() {
+    fn compute_betas_opus_4_7_matches_opus_4_6_family() {
         let plain = compute_betas("claude-opus-4-7", &api_key(), true, false, true);
         assert!(plain.contains(&INTERLEAVED_THINKING_BETA_HEADER));
         assert!(plain.contains(&CONTEXT_MANAGEMENT_BETA_HEADER));
@@ -2105,22 +2351,30 @@ data: {"type":"error","error":{"type":"overloaded_error","message":"Servers over
 
     #[test]
     fn static_prefix_cache_control_emits_global_scope_on_first_party_only() {
-        // 1P → `{"type":"ephemeral","scope":"global"}` — global cache.
-        // 3P → `{"type":"ephemeral"}` — default (org) scope; every
-        // gateway accepts this.
-        let first = static_prefix_cache_control(true);
+        let first = static_prefix_cache_control(true, PromptCacheTtl::OneHour);
         assert_eq!(first.r#type, "ephemeral");
         assert_eq!(first.scope, Some("global"));
 
-        let third = static_prefix_cache_control(false);
+        let third = static_prefix_cache_control(false, PromptCacheTtl::OneHour);
         assert_eq!(third.r#type, "ephemeral");
         assert_eq!(third.scope, None);
+    }
 
-        // Round-trip through JSON to pin the on-wire shape — the
-        // `scope` key must be absent (not `null`) in the 3P case so
-        // gateways that validate the field strictly accept it.
-        let wire = serde_json::to_string(&third).unwrap();
-        assert_eq!(wire, r#"{"type":"ephemeral"}"#);
+    #[test]
+    fn static_prefix_cache_control_ttl_matches_config() {
+        // 1h → `ttl: "1h"` in the wire. 5m → field absent entirely
+        // (matches server default; keeps the pre-2026-03 wire shape).
+        let one_hour = static_prefix_cache_control(false, PromptCacheTtl::OneHour);
+        assert_eq!(
+            serde_json::to_string(&one_hour).unwrap(),
+            r#"{"type":"ephemeral","ttl":"1h"}"#,
+        );
+
+        let five_min = static_prefix_cache_control(false, PromptCacheTtl::FiveMin);
+        assert_eq!(
+            serde_json::to_string(&five_min).unwrap(),
+            r#"{"type":"ephemeral"}"#,
+        );
     }
 
     // ── api_model_id / has_1m_tag ──
diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs
index d495f92..986d4bc 100644
--- a/crates/oxide-code/src/config.rs
+++ b/crates/oxide-code/src/config.rs
@@ -8,8 +8,11 @@
 mod file;
 mod oauth;
 
-use anyhow::{Context, Result};
-use serde::Serialize;
+use std::fmt;
+use std::str::FromStr;
+
+use anyhow::{Context, Result, bail};
+use serde::{Deserialize, Serialize};
 
 use crate::util::env;
 
@@ -28,16 +31,134 @@ pub enum Auth {
 #[derive(Debug, Clone, Serialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ThinkingConfig {
-    /// Model decides the thinking budget (Claude 4.6+).
-    Adaptive,
+    /// Model decides the thinking budget (Claude 4.6+). `display`
+    /// controls what the API streams back: `Omitted` (4.7 default,
+    /// empty `thinking` field) or `Summarized` (the 4.6 default, and
+    /// what oxide-code enables whenever `show_thinking=true`).
+    Adaptive {
+        #[serde(skip_serializing_if = "Option::is_none")]
+        display: Option<ThinkingDisplay>,
+    },
+}
+
+/// `thinking.display` values accepted by the API on 4.7+. Only
+/// `Summarized` is ever emitted — omitting the field entirely (via
+/// `display: None`) already yields the `omitted` default on 4.7.
+#[derive(Debug, Clone, Copy, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ThinkingDisplay {
+    Summarized,
+}
+
+/// Intelligence-vs-latency tier sent as `output_config.effort` on
+/// effort-capable models. The per-model ceiling lives in
+/// [`crate::model::Capabilities`].
+#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Effort {
+    Low,
+    Medium,
+    High,
+    Xhigh,
+    Max,
+}
+
+impl Effort {
+    const fn as_str(self) -> &'static str {
+        match self {
+            Self::Low => "low",
+            Self::Medium => "medium",
+            Self::High => "high",
+            Self::Xhigh => "xhigh",
+            Self::Max => "max",
+        }
+    }
+}
+
+impl fmt::Display for Effort {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+impl FromStr for Effort {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "low" => Ok(Self::Low),
+            "medium" => Ok(Self::Medium),
+            "high" => Ok(Self::High),
+            "xhigh" => Ok(Self::Xhigh),
+            "max" => Ok(Self::Max),
+            _ => bail!("invalid effort {s:?}; expected one of: low, medium, high, xhigh, max"),
+        }
+    }
+}
+
+/// Prompt-cache TTL sent as `cache_control.ttl`. Anthropic silently
+/// dropped the default from 1h to 5m on 2026-03-06, so `OneHour` is
+/// explicit opt-in. oxide-code defaults to `OneHour`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)]
+pub enum PromptCacheTtl {
+    #[serde(rename = "5m")]
+    FiveMin,
+    #[serde(rename = "1h")]
+    OneHour,
+}
+
+impl PromptCacheTtl {
+    /// Wire value for `cache_control.ttl`. `None` when the TTL is
+    /// the server default (5 m) so the JSON omits the field entirely.
+    pub(crate) const fn wire(self) -> Option<&'static str> {
+        match self {
+            Self::FiveMin => None,
+            Self::OneHour => Some("1h"),
+        }
+    }
+
+    const fn as_str(self) -> &'static str {
+        match self {
+            Self::FiveMin => "5m",
+            Self::OneHour => "1h",
+        }
+    }
+}
+
+impl fmt::Display for PromptCacheTtl {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+impl FromStr for PromptCacheTtl {
+    type Err = anyhow::Error;
+
+    fn from_str(s: &str) -> Result<Self> {
+        match s {
+            "5m" => Ok(Self::FiveMin),
+            "1h" => Ok(Self::OneHour),
+            _ => bail!("invalid prompt_cache_ttl {s:?}; expected one of: 5m, 1h"),
+        }
+    }
 }
 
+/// Resolved configuration.
 #[derive(Debug, Clone)]
 pub struct Config {
     pub auth: Auth,
-    pub model: String,
     pub base_url: String,
+    pub model: String,
+    /// `output_config.effort` for the streaming path. `None` means
+    /// the model doesn't accept the parameter and the field is
+    /// omitted. Resolved once at [`Config::load`] — callers forward.
+    pub effort: Option<Effort>,
     pub max_tokens: u32,
+    /// `cache_control.ttl` for every cacheable block. Default is
+    /// [`PromptCacheTtl::OneHour`] since Anthropic's 2026-03 TTL
+    /// drop made the server default (5 m) a silent cost regression
+    /// on long sessions.
+    pub prompt_cache_ttl: PromptCacheTtl,
     pub thinking: Option<ThinkingConfig>,
     pub show_thinking: bool,
 }
@@ -74,29 +195,66 @@ impl Config {
             .or(client.base_url)
             .unwrap_or_else(|| DEFAULT_BASE_URL.to_owned());
 
+        let caps = crate::model::capabilities_for(&model);
+
+        let effort_pick = match env::string("ANTHROPIC_EFFORT") {
+            Some(raw) => Some(raw.parse::<Effort>().context("ANTHROPIC_EFFORT")?),
+            None => client.effort,
+        };
+        let effort = match effort_pick {
+            Some(pick) => caps.clamp_effort(pick),
+            None => caps.default_effort(),
+        };
+
         let max_tokens = env::string("ANTHROPIC_MAX_TOKENS")
             .and_then(|v| v.parse().ok())
             .or(client.max_tokens)
-            .unwrap_or(DEFAULT_MAX_TOKENS);
-
-        // Adaptive thinking is always enabled — the model decides the budget.
-        let thinking = Some(ThinkingConfig::Adaptive);
+            .unwrap_or_else(|| default_max_tokens(effort));
 
         let show_thinking = env::bool("OX_SHOW_THINKING")
             .or(tui.show_thinking)
             .unwrap_or(false);
 
+        // Adaptive thinking is always enabled — the model decides the
+        // budget. `display` opts 4.7 into streaming summarized thinking
+        // text (its default changed to `omitted` silently); 4.6 and
+        // older ignore the field.
+        let thinking = Some(ThinkingConfig::Adaptive {
+            display: show_thinking.then_some(ThinkingDisplay::Summarized),
+        });
+
+        let prompt_cache_ttl = match env::string("OX_PROMPT_CACHE_TTL") {
+            Some(raw) => raw
+                .parse::<PromptCacheTtl>()
+                .context("OX_PROMPT_CACHE_TTL")?,
+            None => client.prompt_cache_ttl.unwrap_or(PromptCacheTtl::OneHour),
+        };
+
         Ok(Self {
             auth,
-            model,
             base_url,
+            model,
+            effort,
             max_tokens,
+            prompt_cache_ttl,
             thinking,
             show_thinking,
         })
     }
 }
 
+/// Per-effort `max_tokens` default. Matches claude-code 2.1.119's
+/// observed values: 64 K for the top two tiers (xhigh / max), 32 K
+/// for high, the legacy 16 384 for everything else. Users override
+/// via `ANTHROPIC_MAX_TOKENS` / `[client].max_tokens`.
+fn default_max_tokens(effort: Option<Effort>) -> u32 {
+    match effort {
+        Some(Effort::Xhigh | Effort::Max) => 64_000,
+        Some(Effort::High) => 32_000,
+        _ => DEFAULT_MAX_TOKENS,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::collections::HashSet;
@@ -109,9 +267,89 @@ mod tests {
     // ── ThinkingConfig ──
 
     #[test]
-    fn thinking_config_adaptive_serializes() {
-        let json = serde_json::to_value(&ThinkingConfig::Adaptive).unwrap();
+    fn thinking_config_adaptive_without_display_serializes_bare() {
+        // Older models ignore `display`; absence keeps the wire as
+        // pre-4.7 clients expect.
+        let json = serde_json::to_value(&ThinkingConfig::Adaptive { display: None }).unwrap();
+        assert_eq!(json["type"], "adaptive");
+        assert!(json.get("display").is_none(), "display omitted: {json}");
+    }
+
+    #[test]
+    fn thinking_config_adaptive_with_summarized_display_serializes() {
+        let json = serde_json::to_value(&ThinkingConfig::Adaptive {
+            display: Some(ThinkingDisplay::Summarized),
+        })
+        .unwrap();
         assert_eq!(json["type"], "adaptive");
+        assert_eq!(json["display"], "summarized");
+    }
+
+    // ── Effort ──
+
+    #[test]
+    fn effort_round_trips_through_serde_and_fromstr() {
+        for (variant, token) in [
+            (Effort::Low, "low"),
+            (Effort::Medium, "medium"),
+            (Effort::High, "high"),
+            (Effort::Xhigh, "xhigh"),
+            (Effort::Max, "max"),
+        ] {
+            assert_eq!(serde_json::to_value(variant).unwrap(), token);
+            assert_eq!(variant.to_string(), token);
+            assert_eq!(token.parse::<Effort>().unwrap(), variant);
+        }
+    }
+
+    #[test]
+    fn effort_rejects_unknown_tokens_with_actionable_error() {
+        let err = "extra-high".parse::<Effort>().expect_err("unknown token");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("extra-high"), "names the input: {msg}");
+        for token in ["low", "medium", "high", "xhigh", "max"] {
+            assert!(msg.contains(token), "lists {token}: {msg}");
+        }
+    }
+
+    #[test]
+    fn effort_round_trips_through_toml_deserialize() {
+        #[derive(Deserialize)]
+        struct Wrap {
+            effort: Effort,
+        }
+        let wrap: Wrap = toml::from_str(r#"effort = "xhigh""#).unwrap();
+        assert_eq!(wrap.effort, Effort::Xhigh);
+    }
+
+    // ── PromptCacheTtl ──
+
+    #[test]
+    fn prompt_cache_ttl_wire_shape() {
+        // 5m is the server default → field omitted. 1h opts in → "1h".
+        assert_eq!(PromptCacheTtl::FiveMin.wire(), None);
+        assert_eq!(PromptCacheTtl::OneHour.wire(), Some("1h"));
+    }
+
+    #[test]
+    fn prompt_cache_ttl_round_trips_through_serde_and_fromstr() {
+        for (variant, token) in [
+            (PromptCacheTtl::FiveMin, "5m"),
+            (PromptCacheTtl::OneHour, "1h"),
+        ] {
+            assert_eq!(serde_json::to_value(variant).unwrap(), token);
+            assert_eq!(variant.to_string(), token);
+            assert_eq!(token.parse::<PromptCacheTtl>().unwrap(), variant);
+        }
+    }
+
+    #[test]
+    fn prompt_cache_ttl_rejects_unknown_tokens_with_actionable_error() {
+        let err = "30m".parse::<PromptCacheTtl>().expect_err("unknown token");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("30m"), "{msg}");
+        assert!(msg.contains("5m"), "{msg}");
+        assert!(msg.contains("1h"), "{msg}");
     }
 
     // ── Config::load ──
@@ -125,7 +363,9 @@ mod tests {
         "ANTHROPIC_MODEL",
         "ANTHROPIC_BASE_URL",
         "ANTHROPIC_MAX_TOKENS",
+        "ANTHROPIC_EFFORT",
         "OX_SHOW_THINKING",
+        "OX_PROMPT_CACHE_TTL",
         "XDG_CONFIG_HOME",
     ];
 
@@ -176,13 +416,19 @@ mod tests {
 
     #[tokio::test]
     async fn load_defaults_apply_when_no_config_and_no_env() {
+        // Default model (Opus 4.7) supports `xhigh`, so both `effort`
+        // and `max_tokens` derive from that ceiling — matches the
+        // claude-code 2.1.119 packet capture. Prompt cache defaults
+        // to 1h (opt-out via `OX_PROMPT_CACHE_TTL=5m`).
         let dir = tempfile::tempdir().unwrap();
         let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load())
             .await
             .unwrap();
         assert_eq!(config.model, DEFAULT_MODEL);
         assert_eq!(config.base_url, DEFAULT_BASE_URL);
-        assert_eq!(config.max_tokens, DEFAULT_MAX_TOKENS);
+        assert_eq!(config.max_tokens, 64_000);
+        assert_eq!(config.effort, Some(Effort::Xhigh));
+        assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::OneHour);
         assert!(!config.show_thinking);
         assert!(matches!(config.auth, Auth::ApiKey(k) if k == "sk-default"));
     }
@@ -298,7 +544,10 @@ mod tests {
         let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load())
             .await
             .unwrap();
-        assert!(matches!(config.thinking, Some(ThinkingConfig::Adaptive)));
+        assert!(matches!(
+            config.thinking,
+            Some(ThinkingConfig::Adaptive { display: None }),
+        ));
     }
 
     #[tokio::test]
@@ -345,4 +594,184 @@ mod tests {
         assert!(msg.contains("invalid config at"), "{msg}");
         assert!(msg.contains("unknown field `show_thinking`"), "{msg}");
     }
+
+    // ── Config::load / effort resolution ──
+
+    #[tokio::test]
+    async fn load_effort_default_follows_model_ceiling() {
+        for (model, expected) in [
+            ("claude-opus-4-7", Some(Effort::Xhigh)),
+            ("claude-opus-4-6", Some(Effort::High)),
+            ("claude-sonnet-4-6", Some(Effort::High)),
+            ("claude-sonnet-4-5", None),
+            ("claude-haiku-4-5", None),
+        ] {
+            let dir = tempfile::tempdir().unwrap();
+            let vars = env_vars(vec![xdg(&dir), env("ANTHROPIC_MODEL", model)]);
+            let config = temp_env::async_with_vars(vars, Config::load())
+                .await
+                .unwrap();
+            assert_eq!(config.effort, expected, "model={model}");
+        }
+    }
+
+    #[tokio::test]
+    async fn load_effort_env_overrides_per_model_default() {
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![
+            xdg(&dir),
+            env("ANTHROPIC_MODEL", "claude-opus-4-7"),
+            env("ANTHROPIC_EFFORT", "low"),
+        ]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.effort, Some(Effort::Low));
+    }
+
+    #[tokio::test]
+    async fn load_effort_clamps_xhigh_down_to_high_on_sonnet_4_6() {
+        // Sonnet 4.6 supports `effort` but not `xhigh` / `max` — the
+        // user's pick must clamp rather than 400 the gateway.
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![
+            xdg(&dir),
+            env("ANTHROPIC_MODEL", "claude-sonnet-4-6"),
+            env("ANTHROPIC_EFFORT", "xhigh"),
+        ]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.effort, Some(Effort::High));
+    }
+
+    #[tokio::test]
+    async fn load_effort_clamps_to_none_on_non_effort_capable_model() {
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![
+            xdg(&dir),
+            env("ANTHROPIC_MODEL", "claude-haiku-4-5"),
+            env("ANTHROPIC_EFFORT", "max"),
+        ]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.effort, None);
+    }
+
+    #[tokio::test]
+    async fn load_effort_file_picks_up_when_env_unset() {
+        let dir = tempfile::tempdir().unwrap();
+        write_user_config(
+            dir.path(),
+            indoc::indoc! {r#"
+                [client]
+                model = "claude-opus-4-7"
+                effort = "medium"
+            "#},
+        );
+        let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.effort, Some(Effort::Medium));
+    }
+
+    #[tokio::test]
+    async fn load_effort_env_beats_file() {
+        let dir = tempfile::tempdir().unwrap();
+        write_user_config(
+            dir.path(),
+            indoc::indoc! {r#"
+                [client]
+                model = "claude-opus-4-7"
+                effort = "low"
+            "#},
+        );
+        let vars = env_vars(vec![xdg(&dir), env("ANTHROPIC_EFFORT", "max")]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.effort, Some(Effort::Max));
+    }
+
+    #[tokio::test]
+    async fn load_effort_invalid_env_surfaces_parse_error() {
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![xdg(&dir), env("ANTHROPIC_EFFORT", "insane")]);
+        let err = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .expect_err("invalid effort must propagate");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("ANTHROPIC_EFFORT"), "{msg}");
+        assert!(msg.contains("insane"), "{msg}");
+    }
+
+    // ── default_max_tokens ──
+
+    #[test]
+    fn default_max_tokens_scales_with_effort() {
+        assert_eq!(default_max_tokens(Some(Effort::Max)), 64_000);
+        assert_eq!(default_max_tokens(Some(Effort::Xhigh)), 64_000);
+        assert_eq!(default_max_tokens(Some(Effort::High)), 32_000);
+        assert_eq!(default_max_tokens(Some(Effort::Medium)), DEFAULT_MAX_TOKENS);
+        assert_eq!(default_max_tokens(Some(Effort::Low)), DEFAULT_MAX_TOKENS);
+        assert_eq!(default_max_tokens(None), DEFAULT_MAX_TOKENS);
+    }
+
+    // ── Config::load / prompt_cache_ttl ──
+
+    #[tokio::test]
+    async fn load_prompt_cache_ttl_env_overrides_default() {
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![xdg(&dir), env("OX_PROMPT_CACHE_TTL", "5m")]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::FiveMin);
+    }
+
+    #[tokio::test]
+    async fn load_prompt_cache_ttl_file_picks_up_when_env_unset() {
+        let dir = tempfile::tempdir().unwrap();
+        write_user_config(
+            dir.path(),
+            indoc::indoc! {r#"
+                [client]
+                prompt_cache_ttl = "5m"
+            "#},
+        );
+        let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::FiveMin);
+    }
+
+    #[tokio::test]
+    async fn load_prompt_cache_ttl_env_beats_file() {
+        let dir = tempfile::tempdir().unwrap();
+        write_user_config(
+            dir.path(),
+            indoc::indoc! {r#"
+                [client]
+                prompt_cache_ttl = "5m"
+            "#},
+        );
+        let vars = env_vars(vec![xdg(&dir), env("OX_PROMPT_CACHE_TTL", "1h")]);
+        let config = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .unwrap();
+        assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::OneHour);
+    }
+
+    #[tokio::test]
+    async fn load_prompt_cache_ttl_invalid_env_surfaces_parse_error() {
+        let dir = tempfile::tempdir().unwrap();
+        let vars = env_vars(vec![xdg(&dir), env("OX_PROMPT_CACHE_TTL", "forever")]);
+        let err = temp_env::async_with_vars(vars, Config::load())
+            .await
+            .expect_err("invalid ttl must propagate");
+        let msg = format!("{err:#}");
+        assert!(msg.contains("OX_PROMPT_CACHE_TTL"), "{msg}");
+        assert!(msg.contains("forever"), "{msg}");
+    }
 }
diff --git a/crates/oxide-code/src/config/file.rs b/crates/oxide-code/src/config/file.rs
index 233dd1b..bdc45b3 100644
--- a/crates/oxide-code/src/config/file.rs
+++ b/crates/oxide-code/src/config/file.rs
@@ -33,13 +33,19 @@ pub(super) struct FileConfig {
 }
 
 /// API client settings (`[client]` section).
+///
+/// Fields are grouped by concern so adjacent lines stay related:
+/// connection (`api_key`, `base_url`), model selection (`model`,
+/// `effort`), then request tuning (`max_tokens`, `prompt_cache_ttl`).
 #[derive(Debug, Default, Deserialize)]
 #[serde(deny_unknown_fields)]
 pub(super) struct ClientConfig {
     pub api_key: Option<String>,
-    pub model: Option<String>,
     pub base_url: Option<String>,
+    pub model: Option<String>,
+    pub effort: Option<super::Effort>,
     pub max_tokens: Option<u32>,
+    pub prompt_cache_ttl: Option<super::PromptCacheTtl>,
 }
 
 /// Terminal UI settings (`[tui]` section).
@@ -65,9 +71,11 @@ impl ClientConfig {
     fn merge(self, other: Self) -> Self {
         Self {
             api_key: other.api_key.or(self.api_key),
-            model: other.model.or(self.model),
             base_url: other.base_url.or(self.base_url),
+            model: other.model.or(self.model),
+            effort: other.effort.or(self.effort),
             max_tokens: other.max_tokens.or(self.max_tokens),
+            prompt_cache_ttl: other.prompt_cache_ttl.or(self.prompt_cache_ttl),
         }
     }
 }
@@ -186,9 +194,11 @@ mod tests {
         let base = FileConfig {
             client: Some(ClientConfig {
                 api_key: Some("base-key".to_owned()),
-                model: Some("base-model".to_owned()),
                 base_url: Some("https://base.example.com".to_owned()),
+                model: Some("base-model".to_owned()),
+                effort: Some(super::super::Effort::Low),
                 max_tokens: Some(1000),
+                prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin),
             }),
             tui: Some(TuiConfig {
                 show_thinking: Some(false),
@@ -197,9 +207,11 @@ mod tests {
         let other = FileConfig {
             client: Some(ClientConfig {
                 api_key: Some("other-key".to_owned()),
-                model: Some("other-model".to_owned()),
                 base_url: Some("https://other.example.com".to_owned()),
+                model: Some("other-model".to_owned()),
+                effort: Some(super::super::Effort::Max),
                 max_tokens: Some(2000),
+                prompt_cache_ttl: Some(super::super::PromptCacheTtl::OneHour),
             }),
             tui: Some(TuiConfig {
                 show_thinking: Some(true),
@@ -209,12 +221,17 @@ mod tests {
 
         let client = merged.client.expect("client section should be present");
         assert_eq!(client.api_key.as_deref(), Some("other-key"));
-        assert_eq!(client.model.as_deref(), Some("other-model"));
         assert_eq!(
             client.base_url.as_deref(),
             Some("https://other.example.com")
         );
+        assert_eq!(client.model.as_deref(), Some("other-model"));
+        assert_eq!(client.effort, Some(super::super::Effort::Max));
         assert_eq!(client.max_tokens, Some(2000));
+        assert_eq!(
+            client.prompt_cache_ttl,
+            Some(super::super::PromptCacheTtl::OneHour)
+        );
 
         let tui = merged.tui.expect("tui section should be present");
         assert_eq!(tui.show_thinking, Some(true));
@@ -225,9 +242,11 @@ mod tests {
         let base = FileConfig {
             client: Some(ClientConfig {
                 api_key: Some("key".to_owned()),
-                model: Some("model".to_owned()),
                 base_url: Some("https://example.com".to_owned()),
+                model: Some("model".to_owned()),
+                effort: Some(super::super::Effort::High),
                 max_tokens: Some(4096),
+                prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin),
             }),
             tui: Some(TuiConfig {
                 show_thinking: Some(true),
@@ -237,9 +256,14 @@ mod tests {
 
         let client = merged.client.expect("client section should survive");
         assert_eq!(client.api_key.as_deref(), Some("key"));
-        assert_eq!(client.model.as_deref(), Some("model"));
         assert_eq!(client.base_url.as_deref(), Some("https://example.com"));
+        assert_eq!(client.model.as_deref(), Some("model"));
+        assert_eq!(client.effort, Some(super::super::Effort::High));
         assert_eq!(client.max_tokens, Some(4096));
+        assert_eq!(
+            client.prompt_cache_ttl,
+            Some(super::super::PromptCacheTtl::FiveMin)
+        );
 
         let tui = merged.tui.expect("tui section should survive");
         assert_eq!(tui.show_thinking, Some(true));
diff --git a/crates/oxide-code/src/model.rs b/crates/oxide-code/src/model.rs
index 3bbe421..8d21c4f 100644
--- a/crates/oxide-code/src/model.rs
+++ b/crates/oxide-code/src/model.rs
@@ -13,23 +13,30 @@
 //! an experimental beta than 400 a request.
 //!
 //! Capability flags mirror the third-party-gateway branch of the upstream
-//! `modelSupports*` predicates:
+//! `modelSupports*` predicates (substring rules) and a few client-side
+//! additions that come from the migration guide + live packet captures
+//! (per-version allowlists):
 //!
 //! - `interleaved_thinking` ← `modelSupportsISP` — substring `opus-4` or
 //!   `sonnet-4`.
 //! - `context_management` ← `modelSupportsContextManagement` — substring
 //!   `opus-4`, `sonnet-4`, or `haiku-4`.
-//! - `effort` ← `modelSupportsEffort` — substring `opus-4-6` or
-//!   `sonnet-4-6`.
 //! - `context_1m` ← `modelSupports1M` — substring `claude-sonnet-4` or
 //!   `opus-4-6`.
+//! - `effort` ← `modelSupportsEffort` — substring `opus-4-6` or
+//!   `sonnet-4-6`.
+//! - `effort_max` — explicit allowlist: Opus 4.6, Opus 4.7.
+//! - `effort_xhigh` — explicit allowlist: Opus 4.7.
 //! - `structured_outputs` ← `modelSupportsStructuredOutputs` — explicit
 //!   allowlist: opus-4-1 / 4-5 / 4-6, sonnet-4-5 / 4-6, haiku-4-5.
 //!
 //! `capability_flags_match_upstream_predicates` in the test module locks
 //! every row to the substring predicates above so a mis-bump fails CI
-//! loudly. The structured-outputs allowlist is exercised by a separate
-//! enumeration test because it doesn't reduce to a substring rule.
+//! loudly. Flags that are allowlist-shaped (`effort_max`, `effort_xhigh`,
+//! `structured_outputs`) are exercised by per-flag enumeration tests
+//! because they don't reduce to a substring rule.
+
+use crate::config::Effort;
 
 /// Metadata and capability flags for a single Claude model.
 pub(crate) struct ModelInfo {
@@ -44,11 +51,11 @@ pub(crate) struct ModelInfo {
 }
 
 /// Per-model feature flags consulted by the API client to gate beta
-/// headers. Each flag corresponds to a `modelSupports*` check in the
-/// upstream reference: `interleaved_thinking` → `modelSupportsISP`,
-/// `context_management` → `modelSupportsContextManagement`, `effort` →
-/// `modelSupportsEffort`, `context_1m` → `modelSupports1M`,
-/// `structured_outputs` → `modelSupportsStructuredOutputs`.
+/// headers and body fields. `interleaved_thinking`, `context_management`,
+/// `effort`, `context_1m`, and `structured_outputs` mirror upstream
+/// `modelSupports*` predicates; `effort_max` and `effort_xhigh` are
+/// client-side allowlists derived from the migration guide and live
+/// packet captures.
 ///
 /// `context_1m` does not currently drive beta sending — that signal is
 /// the user-opt-in `[1m]` tag on the model string. The flag is kept for
@@ -56,21 +63,27 @@ pub(crate) struct ModelInfo {
 /// on models that can't honor it.
 #[expect(
     clippy::struct_excessive_bools,
-    reason = "five independent capability flags — each maps 1:1 to a \
-              separate upstream `modelSupports*` predicate; a bitflag or \
-              state-machine refactor would add indirection without any \
-              expressiveness gain"
+    reason = "seven independent capability flags — each maps 1:1 to a \
+              separate upstream `modelSupports*` predicate or a \
+              per-version allowlist; a bitflag or state-machine refactor \
+              would add indirection without any expressiveness gain"
 )]
 #[derive(Copy, Clone, Default)]
 pub(crate) struct Capabilities {
     pub(crate) interleaved_thinking: bool,
     pub(crate) context_management: bool,
-    pub(crate) effort: bool,
     /// Whether the model accepts the `context-1m-2025-08-07` beta.
     /// `compute_betas` gates the beta on `has_1m_tag(model) AND
     /// context_1m` so a user who tags `claude-haiku-4[1m]` doesn't
     /// silently send an unsupported beta and 400.
     pub(crate) context_1m: bool,
+    /// Gates `output_config.effort` at `low` / `medium` / `high`.
+    /// Upper bound: see [`Self::effort_max`] / [`Self::effort_xhigh`].
+    pub(crate) effort: bool,
+    /// Whether `effort = "max"` is accepted. Opus-only.
+    pub(crate) effort_max: bool,
+    /// Whether `effort = "xhigh"` is accepted. Opus 4.7 only.
+    pub(crate) effort_xhigh: bool,
     /// Whether the model accepts the `structured-outputs-2025-12-15`
     /// beta (JSON-schema-constrained text output). The upstream
     /// allowlist is Opus 4.1/4.5/4.6, Sonnet 4.5/4.6, Haiku 4.5;
@@ -93,15 +106,15 @@ pub(crate) struct Capabilities {
 /// nearest sibling and flip the flags that the upstream predicate(s)
 /// change.
 ///
-/// The one intentional divergence from the reference: Opus 4.7
-/// postdates the upstream snapshot we have on hand. Treating it as
-/// 4.6-equivalent (all caps) reflects the monotonic-capability
-/// assumption and the confirmed-in-the-wild shape of the model;
-/// strictly substring-matching the upstream predicates against 4.7
-/// would wrongly deny `effort` and `1M`, both of which work.
+/// The one intentional divergence from the substring-predicate rules:
+/// Opus 4.7 postdates the upstream snapshot we have on hand, so it
+/// inherits 4.6's monotonic-capability projection for `effort`,
+/// `context_management`, and `1M`. 4.7 uniquely adds `effort_xhigh`;
+/// `effort_max` is Opus-only per the migration guide (4.6 + 4.7).
 pub(crate) const MODELS: &[ModelInfo] = &[
-    // Upstream predates 4.7; capabilities below are our 4.6-equivalent
-    // projection, not a reference fact. Validated empirically.
+    // Upstream predates 4.7; substring-derived flags inherit 4.6 as a
+    // monotonic projection, and `effort_xhigh` is the one 4.7-only
+    // addition (rejected as 400 by every other model).
     ModelInfo {
         id_substr: "claude-opus-4-7",
         marketing: "Claude Opus 4.7",
@@ -109,8 +122,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: true,
             context_1m: true,
+            effort: true,
+            effort_max: true,
+            effort_xhigh: true,
             structured_outputs: true,
         },
     },
@@ -121,8 +136,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: true,
             context_1m: true,
+            effort: true,
+            effort_max: true,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -133,8 +150,12 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: true,
             context_1m: true,
+            effort: true,
+            // `max` is Opus-only per the migration guide; Sonnet 4.6
+            // 400s on it.
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -145,8 +166,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: false,
             context_1m: false,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -157,8 +180,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: false,
             context_1m: true,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -173,8 +198,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
             // target 3P throughout.
             interleaved_thinking: false,
             context_management: true,
-            effort: false,
             context_1m: false,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -185,8 +212,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: false,
             context_1m: false,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: true,
         },
     },
@@ -201,8 +230,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: false,
             context_1m: false,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: false,
         },
     },
@@ -216,8 +247,10 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: true,
             context_management: true,
-            effort: false,
             context_1m: true,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: false,
         },
     },
@@ -228,13 +261,62 @@ pub(crate) const MODELS: &[ModelInfo] = &[
         capabilities: Capabilities {
             interleaved_thinking: false,
             context_management: true,
-            effort: false,
             context_1m: false,
+            effort: false,
+            effort_max: false,
+            effort_xhigh: false,
             structured_outputs: false,
         },
     },
 ];
 
+impl Capabilities {
+    /// Whether the model accepts `output_config.effort = <level>`.
+    /// Centralises the `low`/`medium`/`high` → [`Self::effort`],
+    /// `xhigh` → [`Self::effort_xhigh`], `max` → [`Self::effort_max`]
+    /// mapping so callers don't re-derive it.
+    fn accepts_effort(self, level: Effort) -> bool {
+        match level {
+            Effort::Low | Effort::Medium | Effort::High => self.effort,
+            Effort::Xhigh => self.effort_xhigh,
+            Effort::Max => self.effort_max,
+        }
+    }
+
+    /// Highest level this model accepts ≤ `pick`. `None` when the
+    /// model doesn't accept `output_config.effort` at all. Used by
+    /// [`crate::config::Config::load`] to clamp an out-of-range user
+    /// pick down to the nearest supported level rather than 400ing
+    /// the gateway.
+    pub(crate) fn clamp_effort(self, pick: Effort) -> Option<Effort> {
+        if !self.effort {
+            return None;
+        }
+        [
+            Effort::Max,
+            Effort::Xhigh,
+            Effort::High,
+            Effort::Medium,
+            Effort::Low,
+        ]
+        .into_iter()
+        .find(|&level| level <= pick && self.accepts_effort(level))
+    }
+
+    /// Per-model default when the user hasn't specified one: `Xhigh`
+    /// on 4.7 (matches claude-code 2.1.119), `High` on other
+    /// effort-capable models, `None` otherwise.
+    pub(crate) fn default_effort(self) -> Option<Effort> {
+        if self.effort_xhigh {
+            Some(Effort::Xhigh)
+        } else if self.effort {
+            Some(Effort::High)
+        } else {
+            None
+        }
+    }
+}
+
 /// First-match substring lookup against [`MODELS`]. Returns `None` for
 /// model strings that don't contain any known family stem (e.g. a future
 /// `claude-opus-5` before the table is bumped); callers decide whether
@@ -243,6 +325,16 @@ pub(crate) fn lookup(model: &str) -> Option<&'static ModelInfo> {
     MODELS.iter().find(|info| model.contains(info.id_substr))
 }
 
+/// Capabilities for `model`, falling back to the all-false
+/// [`Capabilities::default`] when the id doesn't match any known row.
+/// Single entry point for the "unknown model → conservative defaults"
+/// invariant so every call site decays the same way.
+pub(crate) fn capabilities_for(model: &str) -> Capabilities {
+    lookup(model)
+        .map(|info| info.capabilities)
+        .unwrap_or_default()
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -284,8 +376,9 @@ mod tests {
         // the `MODELS` table will fail here instead of silently
         // 400-ing one model family on a release day.
         //
-        // Structured outputs are an explicit allowlist rather than a
-        // substring rule, so they're covered by a separate test below.
+        // Allowlist-shaped flags (`effort_max`, `effort_xhigh`,
+        // `structured_outputs`) don't reduce to a substring rule, so
+        // they're covered by per-flag enumeration tests below.
         //
         // Opus 4.7 postdates the predicate set we mirror, so we skip
         // it here — there is no substring rule to check against.
@@ -295,13 +388,13 @@ mod tests {
             }
             let m = info.id_substr;
             let is_opus_or_sonnet_4 = m.contains("opus-4") || m.contains("sonnet-4");
-            let expect_thinking = is_opus_or_sonnet_4; // haiku-4 is not in modelSupportsISP
+            let expect_interleaved_thinking = is_opus_or_sonnet_4; // haiku-4 is not in modelSupportsISP
             let expect_context_management = is_opus_or_sonnet_4 || m.contains("haiku-4");
+            let expect_context_1m = m.contains("claude-sonnet-4") || m.contains("opus-4-6");
             let expect_effort = m.contains("opus-4-6") || m.contains("sonnet-4-6");
-            let expect_one_million = m.contains("claude-sonnet-4") || m.contains("opus-4-6");
 
             assert_eq!(
-                info.capabilities.interleaved_thinking, expect_thinking,
+                info.capabilities.interleaved_thinking, expect_interleaved_thinking,
                 "{m}: interleaved_thinking should match modelSupportsISP",
             );
             assert_eq!(
@@ -309,29 +402,105 @@ mod tests {
                 "{m}: context_management should match modelSupportsContextManagement",
             );
             assert_eq!(
-                info.capabilities.effort, expect_effort,
-                "{m}: effort should match modelSupportsEffort",
+                info.capabilities.context_1m, expect_context_1m,
+                "{m}: context_1m should match modelSupports1M",
             );
             assert_eq!(
-                info.capabilities.context_1m, expect_one_million,
-                "{m}: context_1m should match modelSupports1M",
+                info.capabilities.effort, expect_effort,
+                "{m}: effort should match modelSupportsEffort",
             );
         }
     }
 
     #[test]
-    fn opus_4_7_is_treated_as_4_6_equivalent() {
+    fn opus_4_7_uniquely_supports_xhigh() {
         // Upstream predates 4.7 so its predicates wouldn't claim
         // `effort` or `1M` on this id_substr. We override to the
         // monotonic-bump projection. Pin it so a well-meaning future
         // edit that "aligns 4.7 with the predicates" doesn't
-        // accidentally strip the caps we rely on.
+        // accidentally strip the caps we rely on. `effort_xhigh` is
+        // the one 4.7-only addition — every other row must reject it.
         let caps = lookup("claude-opus-4-7").unwrap().capabilities;
         assert!(caps.interleaved_thinking);
         assert!(caps.context_management);
-        assert!(caps.effort);
         assert!(caps.context_1m);
+        assert!(caps.effort);
+        assert!(caps.effort_max);
+        assert!(caps.effort_xhigh);
         assert!(caps.structured_outputs);
+
+        for other in [
+            "claude-opus-4-6",
+            "claude-sonnet-4-6",
+            "claude-opus-4-5",
+            "claude-sonnet-4-5",
+            "claude-haiku-4-5",
+            "claude-opus-4-1",
+        ] {
+            assert!(
+                !lookup(other).unwrap().capabilities.effort_xhigh,
+                "{other} must not claim effort_xhigh — it 400s on non-4.7",
+            );
+        }
+    }
+
+    #[test]
+    fn effort_max_is_opus_only() {
+        // `max` effort is Opus-only per the migration guide. Sonnet
+        // 4.6 supports base `effort` but 400s on `max`; Haiku doesn't
+        // support `effort` at all.
+        for supported in ["claude-opus-4-7", "claude-opus-4-6"] {
+            assert!(
+                lookup(supported).unwrap().capabilities.effort_max,
+                "{supported} should claim effort_max",
+            );
+        }
+        for unsupported in [
+            "claude-sonnet-4-6",
+            "claude-opus-4-5",
+            "claude-sonnet-4-5",
+            "claude-haiku-4-5",
+            "claude-opus-4-1",
+            "claude-opus-4",
+            "claude-sonnet-4",
+            "claude-haiku-4",
+        ] {
+            assert!(
+                !lookup(unsupported).unwrap().capabilities.effort_max,
+                "{unsupported} must not claim effort_max",
+            );
+        }
+    }
+
+    // ── Capabilities::clamp_effort ──
+
+    #[test]
+    fn clamp_effort_picks_highest_supported_at_or_below_user_pick() {
+        let opus_4_7 = lookup("claude-opus-4-7").unwrap().capabilities;
+        assert_eq!(opus_4_7.clamp_effort(Effort::Max), Some(Effort::Max));
+        assert_eq!(opus_4_7.clamp_effort(Effort::Xhigh), Some(Effort::Xhigh));
+        assert_eq!(opus_4_7.clamp_effort(Effort::Low), Some(Effort::Low));
+
+        // Opus 4.6: Max ✓, Xhigh ✗. `xhigh` clamps down to `high`
+        // (never sideways-up to `max`).
+        let opus_4_6 = lookup("claude-opus-4-6").unwrap().capabilities;
+        assert_eq!(opus_4_6.clamp_effort(Effort::Max), Some(Effort::Max));
+        assert_eq!(opus_4_6.clamp_effort(Effort::Xhigh), Some(Effort::High));
+        assert_eq!(opus_4_6.clamp_effort(Effort::High), Some(Effort::High));
+
+        // Sonnet 4.6: Max ✗, Xhigh ✗. Both clamp to `high`.
+        let sonnet_4_6 = lookup("claude-sonnet-4-6").unwrap().capabilities;
+        assert_eq!(sonnet_4_6.clamp_effort(Effort::Max), Some(Effort::High));
+        assert_eq!(sonnet_4_6.clamp_effort(Effort::Xhigh), Some(Effort::High));
+        assert_eq!(
+            sonnet_4_6.clamp_effort(Effort::Medium),
+            Some(Effort::Medium)
+        );
+
+        // No `effort` at all → None regardless of pick.
+        let haiku_4_5 = lookup("claude-haiku-4-5").unwrap().capabilities;
+        assert_eq!(haiku_4_5.clamp_effort(Effort::Max), None);
+        assert_eq!(haiku_4_5.clamp_effort(Effort::Low), None);
     }
 
     #[test]
diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md
index a134790..86e83cb 100644
--- a/docs/guide/configuration.md
+++ b/docs/guide/configuration.md
@@ -18,8 +18,10 @@ All fields are optional. Only specify the values you want to override.
 [client]
 model = "claude-sonnet-4-6"
 base_url = "https://api.anthropic.com"
-max_tokens = 8192
-# api_key = "sk-ant-..."   # prefer the environment variable for secrets
+effort = "high"
+max_tokens = 32000
+prompt_cache_ttl = "1h"
+# api_key = "sk-ant-..."   # see Authentication below — env var is safer
 
 [tui]
 show_thinking = true
@@ -27,12 +29,43 @@ show_thinking = true
 
 ### `[client]` — API connection
 
-| Key          | Type    | Default                     | Description             |
-| ------------ | ------- | --------------------------- | ----------------------- |
-| `api_key`    | string  | —                           | Anthropic API key       |
-| `model`      | string  | `claude-opus-4-7`           | Model to use            |
-| `base_url`   | string  | `https://api.anthropic.com` | API base URL            |
-| `max_tokens` | integer | `16384`                     | Max tokens per response |
+| Key                | Type    | Default                     | Description                         |
+| ------------------ | ------- | --------------------------- | ----------------------------------- |
+| `api_key`          | string  | —                           | Anthropic API key                   |
+| `base_url`         | string  | `https://api.anthropic.com` | API base URL                        |
+| `model`            | string  | `claude-opus-4-7`           | Model to use                        |
+| `effort`           | string  | per-model (see below)       | Intelligence-vs-latency tier        |
+| `max_tokens`       | integer | effort-derived (see below)  | Max tokens per response             |
+| `prompt_cache_ttl` | string  | `"1h"`                      | Prompt-cache TTL (`"5m"` or `"1h"`) |
+
+#### `effort` — intelligence tier
+
+`effort` maps 1:1 to the `output_config.effort` body field. Accepted values: `"low"`, `"medium"`, `"high"`, `"xhigh"`, `"max"`. Values above a model's per-model ceiling are silently clamped down to the highest supported level (so `"xhigh"` on Sonnet 4.6 becomes `"high"`). Models that don't accept the parameter at all (Sonnet 4.5 and older, Haiku, Opus 4.5 and older) drop it entirely from the request.
+
+Per-model defaults when `effort` is unset:
+
+| Model           | Default |
+| --------------- | ------- |
+| Opus 4.7        | `xhigh` |
+| Opus 4.6        | `high`  |
+| Sonnet 4.6      | `high`  |
+| Everything else | (unset) |
+
+Tier guide (from the [Opus 4.7 migration guide](https://platform.claude.com/docs/en/about-claude/models/migration-guide)):
+
+- `max` — deepest reasoning, Opus-only; diminishing returns on some tasks.
+- `xhigh` — recommended default for coding and agentic work on Opus 4.7.
+- `high` — balanced; minimum recommended for intelligence-sensitive tasks.
+- `medium` — cost-sensitive workloads.
+- `low` — scoped, latency-sensitive tasks.
+
+#### `max_tokens` — response ceiling
+
+When unset, oxide-code derives `max_tokens` from the resolved `effort` to match the claude-code reference: 64 000 for `xhigh` / `max`, 32 000 for `high`, 16 384 otherwise. Setting `max_tokens` explicitly (via TOML or `ANTHROPIC_MAX_TOKENS`) overrides the derivation.
+
+#### `prompt_cache_ttl` — cache duration
+
+Accepted values: `"5m"` (matches the server default as of 2026-03-06) and `"1h"` (higher write premium, bigger hit-rate win on long sessions). oxide-code defaults to `"1h"` because Anthropic's silent 2026-03 TTL drop cut typical prompt-caching savings from 80 %+ to 40-55 %. See [Agentic Request Body Fields](../research/anthropic-api.md#agentic-request-body-fields) for the wire shape and cost analysis.
 
 #### 1M context window — `[1m]` tag
 
@@ -51,6 +84,8 @@ model = "claude-opus-4-7[1m]"
 | --------------- | ------- | ------- | ---------------------- |
 | `show_thinking` | boolean | `false` | Show extended thinking |
 
+On Opus 4.7, `show_thinking = true` additionally opts the request into `thinking.display = "summarized"` so the API streams reasoning text; otherwise the 4.7 default (`"omitted"`) applies and the UI sees nothing until the final answer starts.
+
 ## Authentication
 
 oxide-code checks three credential sources in order:
@@ -58,21 +93,25 @@ oxide-code checks three credential sources in order:
 1. `ANTHROPIC_API_KEY` environment variable.
 2. `api_key` under `[client]` in a config file.
 3. Claude Code OAuth credentials, if [Claude Code](https://code.claude.com/docs) is installed and signed in:
-    - **macOS** — the `"Claude Code-credentials"` Keychain entry (preferred), falling back to `~/.claude/.credentials.json`.
-    - **Linux** — `~/.claude/.credentials.json`.
+   - **macOS** — the `"Claude Code-credentials"` Keychain entry (preferred), falling back to `~/.claude/.credentials.json`.
+   - **Linux** — `~/.claude/.credentials.json`.
+
+   Expired tokens are refreshed automatically. No configuration needed.
 
-    Expired tokens are refreshed automatically. No configuration needed.
+Prefer the environment variable (or OAuth) over `api_key` in a config file. `ox.toml` is resolved by walking up from the current directory, so a project-local `ox.toml` is easy to commit by accident; a user-level `~/.config/ox/config.toml` is safer but still plaintext on disk. This matches what Claude Code itself does — Anthropic's CLI reads `ANTHROPIC_API_KEY` and otherwise keeps OAuth tokens in the macOS Keychain where it can.
 
 ## Environment variables
 
 Environment variables override all config file values.
 
-| Variable               | Config key          | Default                     | Description             |
-| ---------------------- | ------------------- | --------------------------- | ----------------------- |
-| `ANTHROPIC_API_KEY`    | `client.api_key`    | —                           | Anthropic API key       |
-| `ANTHROPIC_MODEL`      | `client.model`      | `claude-opus-4-7`           | Model to use            |
-| `ANTHROPIC_BASE_URL`   | `client.base_url`   | `https://api.anthropic.com` | API base URL            |
-| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens` | `16384`                     | Max tokens per response |
-| `OX_SHOW_THINKING`     | `tui.show_thinking` | `false`                     | Show extended thinking  |
+| Variable               | Config key                | Default                     | Description                  |
+| ---------------------- | ------------------------- | --------------------------- | ---------------------------- |
+| `ANTHROPIC_API_KEY`    | `client.api_key`          | —                           | Anthropic API key            |
+| `ANTHROPIC_BASE_URL`   | `client.base_url`         | `https://api.anthropic.com` | API base URL                 |
+| `ANTHROPIC_MODEL`      | `client.model`            | `claude-opus-4-7`           | Model to use                 |
+| `ANTHROPIC_EFFORT`     | `client.effort`           | per-model                   | Intelligence-vs-latency tier |
+| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens`       | effort-derived              | Max tokens per response      |
+| `OX_PROMPT_CACHE_TTL`  | `client.prompt_cache_ttl` | `1h`                        | Prompt-cache TTL             |
+| `OX_SHOW_THINKING`     | `tui.show_thinking`       | `false`                     | Show extended thinking       |
 
 Set `OX_SHOW_THINKING=1` to display the model's thinking process (dimmed text) when extended thinking is enabled for the model.
diff --git a/docs/research/anthropic-api.md b/docs/research/anthropic-api.md
index 5c75fdc..99ac050 100644
--- a/docs/research/anthropic-api.md
+++ b/docs/research/anthropic-api.md
@@ -85,7 +85,8 @@ Key rules:
 - **Haiku one-shots** (title generation, compaction classifier) — strip agentic markers entirely. `claude-code-20250219` is re-added only when the call is agentic.
 - **`prompt-caching-scope` requires a 1P base URL** — the beta only matters when a block carries `cache_control.scope: "global"`, which 3P gateways reject (see [Prompt Caching Scope](#prompt-caching-scope)). oxide-code gates the header on `is_first_party_base_url()` so requests going through a proxy ship neither the scope field nor its beta.
 - **`context-1m` is user opt-in via `[1m]`** — appending `[1m]` to the model string (e.g., `claude-opus-4-7[1m]`) adds the 1M beta and strips the tag before the request hits the wire. Family-based auto-enable would 400 on subscriptions or gateways that don't carry 1M access. Convention matches claude-code.
-- **`effort` is Opus 4.6+ and Sonnet 4.6+ only** — Opus 4.5 and older, Sonnet 4.5 and older, and all Haiku variants reject it per upstream's `modelSupportsEffort`.
+- **`effort` is Opus 4.6+ and Sonnet 4.6+ only** — Opus 4.5 and older, Sonnet 4.5 and older, and all Haiku variants reject it per upstream's `modelSupportsEffort`. The per-level ceiling (`xhigh` on 4.7, `max` on Opus 4.6 / 4.7) is separately encoded in `Capabilities::effort_xhigh` / `effort_max`.
+- **`effort` and `context-management` betas need a body field.** Sending the header alone is a silent no-op — the request runs at the server default. See [Agentic Request Body Fields](#agentic-request-body-fields) for the matching `output_config.effort` and `context_management.edits` shapes. oxide-code pairs each capability with both its beta and its body field so the two stay in sync.
 - **`structured-outputs` is per-version and caller-opt-in** — the upstream allowlist is Opus 4.1 / 4.5 / 4.6+, Sonnet 4.5 / 4.6+, Haiku 4.5. The beta ships only when a caller supplies an `output_config.format` (today: the AI-title generator). The body field and header are paired on the same capability flag: a schema passed to an unsupported model silently falls back to free-form text, mirroring the `[1m]` × `context_1m` silent-strip pattern.
 - **Unknown model aliases** fall through substring matching on the family stem. `claude-opus-5-x` would miss every row and ship with only the identity / caching betas; bump the `MODELS` table when a new family lands.
 
@@ -214,6 +215,59 @@ The shape is otherwise identical in both modes: same static / dynamic section sp
 
 This matches the broader pattern of gating features like fine-grained tool streaming and client-request-ID injection on base URL rather than on the provider enum alone — the provider flag says "not Bedrock / not Vertex", but a user pointing `ANTHROPIC_BASE_URL` at a proxy still parses as first-party by that check.
 
+## Agentic Request Body Fields
+
+Some capabilities live in the request body alongside (not instead of) the `anthropic-beta` header that gates them. Shipping the header but omitting the body field is a silent no-op — the feature doesn't activate. All three fields below were captured live from `claude-code 2.1.119` and cross-checked against the official migration guide.
+
+### `output_config.effort`
+
+GA as of Opus 4.6. Controls the intelligence-vs-latency tier of agentic turns via one of five tokens: `low`, `medium`, `high`, `xhigh`, `max`.
+
+```json
+{
+  "output_config": { "effort": "xhigh" }
+}
+```
+
+- **The `effort-2025-11-24` beta header is necessary but not sufficient.** oxide-code used to send the header without the body field; the header became a no-op and the model ran at an undefined default.
+- **Per-model ceiling.** `max` is Opus-only; Sonnet 4.6 400s on it. `xhigh` is Opus 4.7-only. The `Capabilities::effort_max` / `effort_xhigh` flags encode this; `Capabilities::clamp_effort` clamps a user pick down to the highest supported level at or below it.
+- **Per-model default.** claude-code 2.1.119 sends `xhigh` on Opus 4.7, `high` on Opus 4.6 and Sonnet 4.6, omits the field entirely on earlier models. oxide-code mirrors this via `Capabilities::default_effort`.
+- **`max_tokens` should scale with effort.** claude-code uses 64 K on Opus 4.7 at `xhigh`, 32 K on Sonnet 4.6 at `high`. oxide-code's `default_max_tokens(effort)` applies the same scaling when the user hasn't set `ANTHROPIC_MAX_TOKENS` explicitly.
+
+### `context_management.edits`
+
+Partners the `context-management-2025-06-27` beta header. claude-code ships the same directive on every 4.6+ request:
+
+```json
+{
+  "context_management": {
+    "edits": [{ "type": "clear_thinking_20251015", "keep": "all" }]
+  }
+}
+```
+
+oxide-code applies the body-header coupling as an invariant: the body field is populated on every request whose model has `Capabilities::context_management` set, i.e. the same condition that enables the beta header. One-shot completions (the `complete` path in `client::anthropic`) skip both — matches the reference wire and keeps the title-generation path minimal.
+
+### `cache_control.ttl`
+
+Anthropic silently dropped the default ephemeral-cache TTL from 1 h to 5 m on 2026-03-06 — a 40-55 % savings regression on any session longer than 5 min. The opt-in is a body field, not a beta:
+
+```json
+{
+  "cache_control": { "type": "ephemeral", "ttl": "1h" }
+}
+```
+
+Accepted values: `"5m"` (server default, equivalent to omitting the field) and `"1h"` (opt-in at higher write premium). No beta header is required — the field is GA.
+
+**oxide-code default.** `prompt_cache_ttl = "1h"`. The hit-rate recovery on real agent sessions (tool-use loops, resumed conversations) dominates the write premium, so 1 h is the right safe default. Users opt down via `[client].prompt_cache_ttl = "5m"` or `OX_PROMPT_CACHE_TTL=5m`.
+
+Invalidation order (from the Anthropic caching docs) is `tools → system → messages` — any change at a level busts that level and every level after it. oxide-code attaches a single `cache_control` to the static system-prompt prefix block (scope-gated on 1 P / 3 P per the previous section); the TTL rides through on both paths.
+
+### `thinking.display`
+
+See [Extended Thinking § Display modes (Opus 4.7+)](./extended-thinking.md#display-modes-opus-47). Opus 4.7 silently flipped the default to `"omitted"`; `show_thinking=true` in oxide-code opts back into `"summarized"`.
+
 ## Third-Party Tool Restrictions
 
 As of April 4, 2026, Anthropic enforces that OAuth subscription credits (Pro / Max) are only valid for official Claude Code and claude.ai clients. Third-party tools that reuse the OAuth flow are classified as "third-party harness traffic" and must use either:
@@ -267,3 +321,5 @@ oxide-code implements the same refresh flow: proactive refresh with the 5-minute
 - `claude-code/src/utils/secureStorage/index.ts` — platform-specific storage dispatch
 - `claude-code/src/utils/secureStorage/macOsKeychainStorage.ts` — macOS Keychain backend
 - `claude-code/src/utils/secureStorage/plainTextStorage.ts` — credential file I/O
+
+Body-field research is empirical rather than source-backed: the `output_config.effort`, `context_management.edits`, and `cache_control.ttl` wire shapes documented above were captured live from a `claude-code --bare -p --model claude-opus-4-7` session against a local SSE proxy on 2026-04-24 and cross-referenced with the [Opus 4.7 migration guide](https://platform.claude.com/docs/en/about-claude/models/migration-guide) and [Anthropic prompt-caching docs](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).
diff --git a/docs/research/extended-thinking.md b/docs/research/extended-thinking.md
index afdb593..554e16d 100644
--- a/docs/research/extended-thinking.md
+++ b/docs/research/extended-thinking.md
@@ -43,6 +43,27 @@ When thinking is enabled, `temperature` must be omitted from the request (API re
 - `interleaved-thinking-2025-05-14` — enables thinking blocks interleaved with text / tool_use.
 - Without this header, thinking blocks appear only at the start of the response.
 
+### Display modes (Opus 4.7+)
+
+Opus 4.7 adds a `thinking.display` field with two wire values:
+
+| Value          | Meaning                                                 |
+| -------------- | ------------------------------------------------------- |
+| `"summarized"` | Thinking blocks stream summarized reasoning text.       |
+| `"omitted"`    | Thinking blocks still ship but `thinking: ""` is empty. |
+
+**Silent default change.** On Opus 4.6, the server defaulted to `"summarized"`. On Opus 4.7 the default is `"omitted"` — any UI that renders streaming reasoning (including oxide-code's `show_thinking` TUI mode) sees a long pause followed by the final answer unless it opts back in:
+
+```json
+{
+  "thinking": { "type": "adaptive", "display": "summarized" }
+}
+```
+
+Older models (4.6, 4.5) accept the field and ignore it, so sending it unconditionally is safe when the caller wants summarized output. oxide-code couples `display` to `config.show_thinking`: `Some(Summarized)` when the TUI is set up to render reasoning, `None` (field absent) otherwise. The `None` path preserves the pre-4.7 wire shape and lets 4.7's `omitted` default do what it says.
+
+No beta header gates `display` — it's GA on 4.7.
+
 ## Thinking Block Lifecycle
 
 ### Streaming
@@ -85,7 +106,7 @@ Every `thinking` block includes a `signature` field received via `signature_delt
 
 Claude Code handles credential rotation in `stripSignatureBlocks()`, which removes all thinking / redacted_thinking blocks when the active credential changes.
 
-oxide-code implements the full thinking data pipeline: typed `Thinking`, `RedactedThinking`, and `ServerToolUse` content blocks with proper streaming accumulation, signature handling, round-trip preservation, and trailing thinking stripping with placeholder insertion. Adaptive thinking is enabled by default. Credential rotation stripping is not yet implemented (depends on Keychain OAuth support).
+oxide-code implements the full thinking data pipeline: typed `Thinking`, `RedactedThinking`, and `ServerToolUse` content blocks with proper streaming accumulation, signature handling, round-trip preservation, and trailing thinking stripping with placeholder insertion. Adaptive thinking is enabled by default; `thinking.display` is set to `"summarized"` whenever the TUI's `show_thinking` flag is on (and omitted otherwise so 4.7's `"omitted"` default applies). Credential rotation stripping is not yet implemented (depends on Keychain OAuth support).
 
 ## Sources