diff --git a/package.json b/package.json
index ef23eac908..f2c394691b 100644
--- a/package.json
+++ b/package.json
@@ -38,7 +38,7 @@
     "lint:docs": "node scripts/validate-package-readmes.mjs",
     "lint:manifests": "node scripts/validate-package-manifests.mjs",
     "lint:workflows": "node scripts/lint-workflow-triggers.mjs",
-    "test:scripts": "node --test scripts/lint-workflow-triggers.test.mjs scripts/validate-skills.test.mjs scripts/determine-version-utils.test.ts scripts/check-upgrade-coverage.test.mjs scripts/set-version-utils.test.ts scripts/check-publish-deps-pn-pins.test.mjs scripts/publish-packages-utils.test.mjs scripts/check-clean-tree.test.mjs scripts/lint-casts.test.mjs scripts/sync-agent-rules.test.mjs skills-contrib/drive-diagnose-run/test/load.test.ts skills-contrib/drive-diagnose-run/test/metrics.test.ts skills-contrib/drive-diagnose-run/test/invariants.test.ts skills-contrib/drive-diagnose-run/test/cascade-brief.test.ts skills-contrib/drive-diagnose-run/test/report.test.ts skills-contrib/drive-diagnose-run/test/posthoc.test.ts skills-contrib/drive-diagnose-run/test/scorecard.test.ts skills-contrib/drive-record-traces/test/emit.test.ts skills-contrib/drive-judge-harness/test/usage.test.ts skills-contrib/drive-judge-harness/test/manifest.test.ts skills-contrib/drive-judge-harness/test/load-brief.test.ts skills-contrib/drive-judge-harness/test/run-one-brief.test.ts skills-contrib/drive-judge-harness/test/validate-parser.test.ts skills-contrib/drive-judge-harness/test/judge-model-sdk.test.ts skills-contrib/drive-judge-harness/test/rubric-correctness.test.ts skills-contrib/drive-judge-harness/test/classify-failure.test.ts skills-contrib/drive-judge-harness/test/classify-operator.test.ts skills-contrib/drive-judge-harness/test/emit-correctness.test.ts skills-contrib/drive-judge-harness/test/calibration.test.ts skills-contrib/drive-judge-harness/test/prepare-run.test.ts skills-contrib/drive-judge-harness/test/collect-run.test.ts skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts skills-contrib/drive-judge-harness/test/run-arm.test.ts",
+    "test:scripts": "node --test scripts/lint-workflow-triggers.test.mjs scripts/validate-skills.test.mjs scripts/determine-version-utils.test.ts scripts/check-upgrade-coverage.test.mjs scripts/set-version-utils.test.ts scripts/check-publish-deps-pn-pins.test.mjs scripts/publish-packages-utils.test.mjs scripts/check-clean-tree.test.mjs scripts/lint-casts.test.mjs scripts/sync-agent-rules.test.mjs skills-contrib/drive-diagnose-run/test/load.test.ts skills-contrib/drive-diagnose-run/test/metrics.test.ts skills-contrib/drive-diagnose-run/test/invariants.test.ts skills-contrib/drive-diagnose-run/test/cascade-brief.test.ts skills-contrib/drive-diagnose-run/test/report.test.ts skills-contrib/drive-diagnose-run/test/posthoc.test.ts skills-contrib/drive-diagnose-run/test/scorecard.test.ts skills-contrib/drive-record-traces/test/emit.test.ts skills-contrib/drive-judge-harness/test/usage.test.ts skills-contrib/drive-judge-harness/test/manifest.test.ts skills-contrib/drive-judge-harness/test/load-brief.test.ts skills-contrib/drive-judge-harness/test/run-one-brief.test.ts skills-contrib/drive-judge-harness/test/sdk-events.test.ts skills-contrib/drive-judge-harness/test/claude-events.test.ts skills-contrib/drive-judge-harness/test/validate-parser.test.ts skills-contrib/drive-judge-harness/test/judge-model-sdk.test.ts skills-contrib/drive-judge-harness/test/rubric-correctness.test.ts skills-contrib/drive-judge-harness/test/classify-failure.test.ts skills-contrib/drive-judge-harness/test/classify-operator.test.ts skills-contrib/drive-judge-harness/test/emit-correctness.test.ts skills-contrib/drive-judge-harness/test/calibration.test.ts skills-contrib/drive-judge-harness/test/prepare-run.test.ts skills-contrib/drive-judge-harness/test/collect-run.test.ts skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts skills-contrib/drive-judge-harness/test/run-arm.test.ts",
     "drive:diagnose": "node skills-contrib/drive-diagnose-run/cli.ts",
     "drive:emit": "node skills-contrib/drive-record-traces/emit.ts",
     "drive:run-brief": "node skills-contrib/drive-judge-harness/run-one-brief.ts",
@@ -59,6 +59,7 @@
     "prepare": "husky && skills add ./skills-contrib --skill '*' --agent universal claude-code -y && node scripts/sync-agent-rules.mjs"
   },
   "devDependencies": {
+    "@anthropic-ai/claude-agent-sdk": "^0.3.158",
     "@biomejs/biome": "2.4.15",
     "@cursor/sdk": "^1.0.15",
     "@prisma-next/tsconfig": "workspace:0.11.0",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 94f23bdd90..e5541db452 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -58,6 +58,9 @@ importers:
 
   .:
     devDependencies:
+      '@anthropic-ai/claude-agent-sdk':
+        specifier: ^0.3.158
+        version: 0.3.158(@anthropic-ai/sdk@0.100.1(zod@3.25.76))(@modelcontextprotocol/sdk@1.29.0(zod@3.25.76))(zod@3.25.76)
       '@biomejs/biome':
         specifier: 2.4.15
         version: 2.4.15
@@ -4572,6 +4575,67 @@ packages:
     resolution: {integrity: sha512-UrcABB+4bUrFABwbluTIBErXwvbsU/V7TZWfmbgJfbkwiBuziS9gxdODUyuiecfdGQ85jglMW6juS3+z5TsKLw==}
     engines: {node: '>=10'}
 
+  '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.158':
+    resolution: {integrity: sha512-9mlkVHeHIiF7oJUjVHbieYgsTzGmKRcgUmp52BhUaDL40Gm5AC0Lotqn0ULniqlr6pNWcbA0+gjEwg7VI9VtSA==}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.158':
+    resolution: {integrity: sha512-3S4ef/f2ksTmUSEK6Di9Vch2Fm5udmZq8kVKO8mAdLV+VuG6KW9kYBzbogDtwYIZFuFo8xs1sPGP2hsdZvghhA==}
+    cpu: [x64]
+    os: [darwin]
+
+  '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.158':
+    resolution: {integrity: sha512-lJ2ZKKirs/RTAU+9IYTd+3CKE4vYe694FkRZ04TBQPiq/ujRUa3vmGm6gSIdmDlrdYMX5j4rdcun+Ym6mPXTtA==}
+    cpu: [arm64]
+    os: [linux]
+    libc: [musl]
+
+  '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.158':
+    resolution: {integrity: sha512-ut9uJclBqrH5NhAuVc0zN84eQM3MP4DTQqh12eVUx83eekHu7l1v6Bg+N5P/m4SM4tEhKl8lQjLpliPtML4lUA==}
+    cpu: [arm64]
+    os: [linux]
+    libc: [glibc]
+
+  '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.158':
+    resolution: {integrity: sha512-cU0NOOA9B8I6E58HejqtO/vsYg3rfWgoaDmrJ1BzM5J4eNS3iSeaxDm7MzcyvEbTHPC1Qgj89XoiDHqhf/V/vg==}
+    cpu: [x64]
+    os: [linux]
+    libc: [musl]
+
+  '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.158':
+    resolution: {integrity: sha512-PqcDGFuzvFA0JPYa11Xcoga13oQbbAGibfASmZG5+dhoq8SniUCj0LkGGnVAgTqX4SQIIMYklS6l7egwkJIi3w==}
+    cpu: [x64]
+    os: [linux]
+    libc: [glibc]
+
+  '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.158':
+    resolution: {integrity: sha512-47S9BUuNOYuUGaMe9ZUaRMfd1UVRt1iP9UwHWqCJUsrTPNnTCY/7lW7aecEr7Z/h3JctegTvx6Iy+mp697R1hQ==}
+    cpu: [arm64]
+    os: [win32]
+
+  '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.158':
+    resolution: {integrity: sha512-YLjoU6Y+WN2nqnafbbEoVd+1ISaz2lHpArTnE+sNO8hOokBLEwAHOWd8uRv9c9CSMCUEyMwvIQ7ANOEXG6NsdQ==}
+    cpu: [x64]
+    os: [win32]
+
+  '@anthropic-ai/claude-agent-sdk@0.3.158':
+    resolution: {integrity: sha512-Rht8Ui7HBsVdBCG6SYs9b+JmJWAVoDXPD2pWNVMSFrzyAS4nizwdz3HtUnAobFumgzbT3LbpWzHdLfUDu4gM4w==}
+    engines: {node: '>=18.0.0'}
+    peerDependencies:
+      '@anthropic-ai/sdk': '>=0.93.0'
+      '@modelcontextprotocol/sdk': ^1.29.0
+      zod: ^4.0.0
+
+  '@anthropic-ai/sdk@0.100.1':
+    resolution: {integrity: sha512-RANcEe7LpiLczkKGOwoXOTuFdPhuubS0i4xaAKOMpcqc55YO0mukgxppV7eygx3DXNjxWT6RYOLPyOy0aIAmwg==}
+    hasBin: true
+    peerDependencies:
+      zod: ^3.25.0 || ^4.0.0
+    peerDependenciesMeta:
+      zod:
+        optional: true
+
   '@ark/schema@0.56.0':
     resolution: {integrity: sha512-ECg3hox/6Z/nLajxXqNhgPtNdHWC9zNsDyskwO28WinoFEnWow4IsERNz9AnXRhTZJnYIlAJ4uGn3nlLk65vZA==}
 
@@ -5711,6 +5775,16 @@ packages:
   '@mjackson/node-fetch-server@0.2.0':
     resolution: {integrity: sha512-EMlH1e30yzmTpGLQjlFmaDAjyOeZhng1/XCd7DExR8PNAnG/G1tyruZxEoUe11ClnwGhGrtsdnyyUx1frSzjng==}
 
+  '@modelcontextprotocol/sdk@1.29.0':
+    resolution: {integrity: sha512-zo37mZA9hJWpULgkRpowewez1y6ML5GsXJPY8FI0tBBCd77HEvza4jDqRKOXgHNn867PVGCyTdzqpz0izu5ZjQ==}
+    engines: {node: '>=18'}
+    peerDependencies:
+      '@cfworker/json-schema': ^4.1.1
+      zod: ^3.25 || ^4.0
+    peerDependenciesMeta:
+      '@cfworker/json-schema':
+        optional: true
+
   '@mongodb-js/saslprep@1.4.11':
     resolution: {integrity: sha512-o9rAHc0IpIjuPSxRutWpE1F62x7n+4mVS4rCNHkzhIUMQcc18bb6xEq5wd2NdN0WjepIyXIppRshYI2kQDOZVA==}
 
@@ -6604,6 +6678,9 @@ packages:
   '@speed-highlight/core@1.2.15':
     resolution: {integrity: sha512-BMq1K3DsElxDWawkX6eLg9+CKJrTVGCBAWVuHXVUV2u0s2711qiChLSId6ikYPfxhdYocLNt3wWwSvDiTvFabw==}
 
+  '@stablelib/base64@1.0.1':
+    resolution: {integrity: sha512-1bnPQqSxSuc3Ii6MhBysoWCg58j97aUjuCSZrGSmDxNqtytIi0k8utUenAwTZN4V5mXXYGsVUI9zeBqy+jBOSQ==}
+
   '@standard-schema/spec@1.1.0':
     resolution: {integrity: sha512-l2aFy5jALhniG5HgqrD6jXLi/rUWrKvqN/qJx6yoJsgKhblVd+iqqU4RCXavm/jPityDo5TCvKMnpjKnOriy0w==}
 
@@ -6965,6 +7042,10 @@ packages:
     resolution: {integrity: sha512-PYAthTa2m2VKxuvSD3DPC/Gy+U+sOA1LAuT8mkmRuvw+NACSaeXEQ+NHcVF7rONl6qcaxV3Uuemwawk+7+SJLw==}
     engines: {node: '>= 0.6'}
 
+  accepts@2.0.0:
+    resolution: {integrity: sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==}
+    engines: {node: '>= 0.6'}
+
   acorn-jsx-walk@2.0.0:
     resolution: {integrity: sha512-uuo6iJj4D4ygkdzd6jPtcxs8vZgDX9YFIkqczGImoypX2fQ4dVImmu3UzA4ynixCIMTrEOWW+95M2HuBaCEOVA==}
 
@@ -7002,6 +7083,14 @@ packages:
     resolution: {integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==}
     engines: {node: '>=8'}
 
+  ajv-formats@3.0.1:
+    resolution: {integrity: sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==}
+    peerDependencies:
+      ajv: ^8.0.0
+    peerDependenciesMeta:
+      ajv:
+        optional: true
+
   ajv@8.20.0:
     resolution: {integrity: sha512-Thbli+OlOj+iMPYFBVBfJ3OmCAnaSyNn4M1vz9T6Gka5Jt9ba/HIR56joy65tY6kx/FCF5VXNB819Y7/GUrBGA==}
 
@@ -7170,6 +7259,10 @@ packages:
     resolution: {integrity: sha512-3grm+/2tUOvu2cjJkvsIxrv/wVpfXQW4PsQHYm7yk4vfpu7Ekl6nEsYBoJUL6qDwZUx8wUhQ8tR2qz+ad9c9OA==}
     engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
 
+  body-parser@2.2.2:
+    resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
+    engines: {node: '>=18'}
+
   brace-expansion@1.1.15:
     resolution: {integrity: sha512-EwOCDEex4quD37XhqM3omwtMoJjr//isUZz1JopUNWms+4Z2ViyM/k1YIRePpoVNnQhENnxtFjLaxNHrT7xIUg==}
 
@@ -7346,16 +7439,28 @@ packages:
     resolution: {integrity: sha512-FveZTNuGw04cxlAiWbzi6zTAL/lhehaWbTtgluJh4/E95DqMwTmha3KZN1aAWA8cFIhHzMZUvLevkw5Rqk+tSQ==}
     engines: {node: '>= 0.6'}
 
+  content-disposition@1.1.0:
+    resolution: {integrity: sha512-5jRCH9Z/+DRP7rkvY83B+yGIGX96OYdJmzngqnw2SBSxqCFPd0w2km3s5iawpGX8krnwSGmF0FW5Nhr0Hfai3g==}
+    engines: {node: '>=18'}
+
   content-type@1.0.5:
     resolution: {integrity: sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==}
     engines: {node: '>= 0.6'}
 
+  content-type@2.0.0:
+    resolution: {integrity: sha512-j/O/d7GcZCyNl7/hwZAb606rzqkyvaDctLmckbxLzHvFBzTJHuGEdodATcP3yIRoDrLHkIATJuvzbFlp/ki2cQ==}
+    engines: {node: '>=18'}
+
   convert-source-map@2.0.0:
     resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
 
   cookie-signature@1.0.7:
     resolution: {integrity: sha512-NXdYc3dLr47pBkpUCHtKSwIOQXLVn8dZEuywboCOJY/osA0wFSLlSawr3KN8qXJEyX66FcONTH8EIlVuK0yyFA==}
 
+  cookie-signature@1.2.2:
+    resolution: {integrity: sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==}
+    engines: {node: '>=6.6.0'}
+
   cookie@0.7.2:
     resolution: {integrity: sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==}
     engines: {node: '>= 0.6'}
@@ -7364,6 +7469,10 @@ packages:
     resolution: {integrity: sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==}
     engines: {node: '>=18'}
 
+  cors@2.8.6:
+    resolution: {integrity: sha512-tJtZBBHA6vjIAaF6EnIaq6laBBP9aq/Y3ouVJjEfoHbRBcHBAHYcMh/w8LDrk2PvIMMq8gmopa5D4V8RmbrxGw==}
+    engines: {node: '>= 0.10'}
+
   cross-spawn@7.0.6:
     resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
     engines: {node: '>= 8'}
@@ -7595,6 +7704,14 @@ packages:
   events-universal@1.0.1:
     resolution: {integrity: sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==}
 
+  eventsource-parser@3.1.0:
+    resolution: {integrity: sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg==}
+    engines: {node: '>=18.0.0'}
+
+  eventsource@3.0.7:
+    resolution: {integrity: sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==}
+    engines: {node: '>=18.0.0'}
+
   evlog@1.9.0:
     resolution: {integrity: sha512-Dzv4drz+MydyZlLok2ATc1O4WBBDEh0+mNl2Tk3NePdaHWgmvCYYovOQgXycxn7NOSv2acRqXHfUlbP6A3rdGQ==}
     peerDependencies:
@@ -7627,10 +7744,20 @@ packages:
     resolution: {integrity: sha512-knvyeauYhqjOYvQ66MznSMs83wmHrCycNEN6Ao+2AeYEfxUIkuiVxdEa1qlGEPK+We3n0THiDciYSsCcgW/DoA==}
     engines: {node: '>=12.0.0'}
 
+  express-rate-limit@8.5.2:
+    resolution: {integrity: sha512-5Kb34ipNX694DH48vN9irak1Qx30nb0PLYHXfJgw4YEjiC3ZEmZJhwOp+VfiCYwFzvFTdB9QkArYS5kXa2cx2A==}
+    engines: {node: '>= 16'}
+    peerDependencies:
+      express: '>= 4.11'
+
   express@4.22.1:
     resolution: {integrity: sha512-F2X8g9P1X7uCPZMA3MVf9wcTqlyNp7IhH5qPCI0izhaOIYXaW9L535tGA3qmjRzpH+bZczqq7hVKxTR4NWnu+g==}
     engines: {node: '>= 0.10.0'}
 
+  express@5.2.1:
+    resolution: {integrity: sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==}
+    engines: {node: '>= 18'}
+
   exsolve@1.0.8:
     resolution: {integrity: sha512-LmDxfWXwcTArk8fUEnOfSZpHOJ6zOMUJKOtFLFqJLoKJetuQG874Uc7/Kki7zFLzYybmZhp1M7+98pfMqeX8yA==}
 
@@ -7644,6 +7771,9 @@ packages:
   fast-fifo@1.3.2:
     resolution: {integrity: sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==}
 
+  fast-sha256@1.3.0:
+    resolution: {integrity: sha512-n11RGP/lrWEFI/bWdygLxhI+pVeo1ZYIVwvvPkW7azl/rOy+F3HYRZ2K5zeE9mmkhQppyv9sQFx0JM9UabnpPQ==}
+
   fast-string-truncated-width@3.0.3:
     resolution: {integrity: sha512-0jjjIEL6+0jag3l2XWWizO64/aZVtpiGE3t0Zgqxv0DPuxiMjvB3M24fCyhZUO4KomJQPj3LTSUnDP3GpdwC0g==}
 
@@ -7672,6 +7802,10 @@ packages:
     resolution: {integrity: sha512-aA4RyPcd3badbdABGDuTXCMTtOneUCAYH/gxoYRTZlIJdF0YPWuGqiAsIrhNnnqdXGswYk6dGujem4w80UJFhg==}
     engines: {node: '>= 0.8'}
 
+  finalhandler@2.1.1:
+    resolution: {integrity: sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==}
+    engines: {node: '>= 18.0.0'}
+
   find-cache-dir@3.3.2:
     resolution: {integrity: sha512-wXZV5emFEjrridIgED11OoUKLxiYjAcqot/NJdAkOhlJ+vGzwhOAfcG5OX1jP+S0PcjEn8bdMJv+g2jwQ3Onig==}
     engines: {node: '>=8'}
@@ -7701,6 +7835,10 @@ packages:
     resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
     engines: {node: '>= 0.6'}
 
+  fresh@2.0.0:
+    resolution: {integrity: sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==}
+    engines: {node: '>= 0.8'}
+
   fs-constants@1.0.0:
     resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==}
 
@@ -7863,6 +8001,10 @@ packages:
     resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
     engines: {node: '>=0.10.0'}
 
+  iconv-lite@0.7.2:
+    resolution: {integrity: sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==}
+    engines: {node: '>=0.10.0'}
+
   ieee754@1.2.1:
     resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
 
@@ -7944,6 +8086,9 @@ packages:
   is-potential-custom-element-name@1.0.1:
     resolution: {integrity: sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==}
 
+  is-promise@4.0.0:
+    resolution: {integrity: sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==}
+
   isarray@2.0.5:
     resolution: {integrity: sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==}
 
@@ -7970,6 +8115,9 @@ packages:
     resolution: {integrity: sha512-AC/7JofJvZGrrneWNaEnJeOLUx+JlGt7tNa0wZiRPT4MY1wmfKjt2+6O2p2uz2+skll8OZZmJMNqeke7kKbNgQ==}
     hasBin: true
 
+  jose@6.2.3:
+    resolution: {integrity: sha512-YYVDInQKFJfR/xa3ojUTl8c2KoTwiL1R5Wg9YCydwH0x0B9grbzlg5HC7mMjCtUJjbQ/YnGEZIhI5tCgfTb4Hw==}
+
   js-tokens@10.0.0:
     resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==}
 
@@ -7999,9 +8147,16 @@ packages:
     engines: {node: '>=6'}
     hasBin: true
 
+  json-schema-to-ts@3.1.1:
+    resolution: {integrity: sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g==}
+    engines: {node: '>=16'}
+
   json-schema-traverse@1.0.0:
     resolution: {integrity: sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==}
 
+  json-schema-typed@8.0.2:
+    resolution: {integrity: sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==}
+
   json-stable-stringify@1.3.0:
     resolution: {integrity: sha512-qtYiSSFlwot9XHtF9bD9c7rwKjr+RecWT//ZnPvSmEjpV5mmPOCN4j8UjY5hbjNkOwZ/jQv3J6R1/pL7RwgMsg==}
     engines: {node: '>= 0.4'}
@@ -8179,12 +8334,20 @@ packages:
     resolution: {integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ==}
     engines: {node: '>= 0.6'}
 
+  media-typer@1.1.0:
+    resolution: {integrity: sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==}
+    engines: {node: '>= 0.8'}
+
   memory-pager@1.5.0:
     resolution: {integrity: sha512-ZS4Bp4r/Zoeq6+NLJpP+0Zzm0pR8whtGPf1XExKLJBAczGMnSi3It14OiNCStjQjM6NU1okjQGSxgEZN8eBYKg==}
 
   merge-descriptors@1.0.3:
     resolution: {integrity: sha512-gaNvAS7TZ897/rVaZ0nMtAyxNyi/pdbjbAwUpFQpN70GqnVfOiXpeUUMKRBmzXaSQ8DdTX4/0ms62r2K+hE6mQ==}
 
+  merge-descriptors@2.0.0:
+    resolution: {integrity: sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==}
+    engines: {node: '>=18'}
+
   methods@1.1.2:
     resolution: {integrity: sha512-iclAHeNqNm68zFtnZ0e+1L2yUIdvzNoauKU4WBA3VvH/vPFieF7qfRlwUZU+DA9P9bPXIS90ulxoUoCH23sV2w==}
     engines: {node: '>= 0.6'}
@@ -8201,6 +8364,10 @@ packages:
     resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==}
     engines: {node: '>= 0.6'}
 
+  mime-types@3.0.2:
+    resolution: {integrity: sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==}
+    engines: {node: '>=18'}
+
   mime@1.6.0:
     resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==}
     engines: {node: '>=4'}
@@ -8352,6 +8519,10 @@ packages:
     resolution: {integrity: sha512-myRT3DiWPHqho5PrJaIRyaMv2kgYf0mUVgBNOYMuCH5Ki1yEiQaf/ZJuQ62nvpc44wL5WDbTX7yGJi1Neevw8w==}
     engines: {node: '>= 0.6'}
 
+  negotiator@1.0.0:
+    resolution: {integrity: sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==}
+    engines: {node: '>= 0.6'}
+
   new-find-package-json@2.0.0:
     resolution: {integrity: sha512-lDcBsjBSMlj3LXH2v/FW3txlh2pYTjmbOXPYJD93HI5EwuLzI11tdHSIpUMmfq/IOsldj4Ps8M8flhm+pCK4Ew==}
     engines: {node: '>=12.22.0'}
@@ -8402,6 +8573,10 @@ packages:
     engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0}
     deprecated: This package is no longer supported.
 
+  object-assign@4.1.1:
+    resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
+    engines: {node: '>=0.10.0'}
+
   object-hash@2.2.0:
     resolution: {integrity: sha512-gScRMn0bS5fH+IuwyIFgnh9zBdo4DV+6GhygmWM9HyNJSgS0hScp1f5vjtm7oIIOiT9trXrShAkLFSc2IqKNgw==}
     engines: {node: '>= 6'}
@@ -8496,6 +8671,9 @@ packages:
   path-to-regexp@6.3.0:
     resolution: {integrity: sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==}
 
+  path-to-regexp@8.4.2:
+    resolution: {integrity: sha512-qRcuIdP69NPm4qbACK+aDogI5CBDMi1jKe0ry5rSQJz8JVLsC7jV8XpiJjGRLLol3N+R5ihGYcrPLTno6pAdBA==}
+
   pathe@1.1.2:
     resolution: {integrity: sha512-whLdWMYL2TwI08hn8/ZqAbrVemu0LNaNNJZX73O6qaIdCTfXutsLhMkjdENX0qhsQ9uIimo4/aQOmXkoon2nDQ==}
 
@@ -8592,6 +8770,10 @@ packages:
     resolution: {integrity: sha512-QP88BAKvMam/3NxH6vj2o21R6MjxZUAd6nlwAS/pnGvN9IVLocLHxGYIzFhg6fUQ+5th6P4dv4eW9jX3DSIj7A==}
     engines: {node: '>=12'}
 
+  pkce-challenge@5.0.1:
+    resolution: {integrity: sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==}
+    engines: {node: '>=16.20.0'}
+
   pkg-dir@4.2.0:
     resolution: {integrity: sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==}
     engines: {node: '>=8'}
@@ -8702,6 +8884,10 @@ packages:
     resolution: {integrity: sha512-s4VSOf6yN0rvbRZGxs8Om5CWj6seneMwK3oDb4lWDH0UPhWcxwOWw5+qk24bxq87szX1ydrwylIOp2uG1ojUpA==}
     engines: {node: '>= 0.8'}
 
+  raw-body@3.0.2:
+    resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==}
+    engines: {node: '>= 0.10'}
+
   rc9@3.0.1:
     resolution: {integrity: sha512-gMDyleLWVE+i6Sgtc0QbbY6pEKqYs97NGi6isHQPqYlLemPoO8dxQ3uGi0f4NiP98c+jMW6cG1Kx9dDwfvqARQ==}
 
@@ -8865,6 +9051,10 @@ packages:
     engines: {node: '>=18.0.0', npm: '>=8.0.0'}
     hasBin: true
 
+  router@2.2.0:
+    resolution: {integrity: sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==}
+    engines: {node: '>= 18'}
+
   safe-buffer@5.1.2:
     resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==}
 
@@ -8911,10 +9101,18 @@ packages:
     resolution: {integrity: sha512-VMbMxbDeehAxpOtWJXlcUS5E8iXh6QmN+BkRX1GARS3wRaXEEgzCcB10gTQazO42tpNIya8xIyNx8fll1OFPrg==}
     engines: {node: '>= 0.8.0'}
 
+  send@1.2.1:
+    resolution: {integrity: sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==}
+    engines: {node: '>= 18'}
+
   serve-static@1.16.3:
     resolution: {integrity: sha512-x0RTqQel6g5SY7Lg6ZreMmsOzncHFU7nhnRWkKgWuMTu5NN0DR5oruckMqRvacAN9d5w6ARnRBXl9xhDCgfMeA==}
     engines: {node: '>= 0.8.0'}
 
+  serve-static@2.2.1:
+    resolution: {integrity: sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==}
+    engines: {node: '>= 18'}
+
   set-blocking@2.0.0:
     resolution: {integrity: sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==}
 
@@ -9041,6 +9239,9 @@ packages:
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
+  standardwebhooks@1.0.0:
+    resolution: {integrity: sha512-BbHGOQK9olHPMvQNHWul6MYlrRTAOKn03rOe4A8O3CLWhNf4YHBqq2HJKKC+sfqpxiBY52pNeesD6jIiLDz8jg==}
+
   statuses@2.0.2:
     resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
     engines: {node: '>= 0.8'}
@@ -9201,6 +9402,9 @@ packages:
     resolution: {integrity: sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==}
     hasBin: true
 
+  ts-algebra@2.0.0:
+    resolution: {integrity: sha512-FPAhNPFMrkwz76P7cdjdmiShwMynZYN6SgOujD1urY4oNm80Ou9oMdmbR45LotcKOXoy7wSmHkRFE6Mxbrhefw==}
+
   ts-toolbelt@9.6.0:
     resolution: {integrity: sha512-nsZd8ZeNUzukXPlJmTBwUAuABDe/9qtVDelJeT/qW0ow3ZS3BsQJtNkan1802aM9Uf68/Y8ljw86Hu0h5IUW3w==}
 
@@ -9278,6 +9482,10 @@ packages:
     resolution: {integrity: sha512-TkRKr9sUTxEH8MdfuCSP7VizJyzRNMjj2J2do2Jr3Kym598JVdEksuzPQCnlFPW4ky9Q+iA+ma9BGm06XQBy8g==}
     engines: {node: '>= 0.6'}
 
+  type-is@2.1.0:
+    resolution: {integrity: sha512-faYHw0anBbc/kWF3zFTEnxSFOAGUX9GFbOBthvDdLsIlEoWOFOtS0zgCiQYwIskL9iGXZL3kAXD8OoZ4GmMATA==}
+    engines: {node: '>= 18'}
+
   typescript@5.9.3:
     resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
     engines: {node: '>=14.17'}
@@ -9730,6 +9938,11 @@ packages:
   zeptomatch@2.1.0:
     resolution: {integrity: sha512-KiGErG2J0G82LSpniV0CtIzjlJ10E04j02VOudJsPyPwNZgGnRKQy7I1R7GMyg/QswnE4l7ohSGrQbQbjXPPDA==}
 
+  zod-to-json-schema@3.25.2:
+    resolution: {integrity: sha512-O/PgfnpT1xKSDeQYSCfRI5Gy3hPf91mKVDuYLUHZJMiDFptvP41MSnWofm8dnCm0256ZNfZIM7DSzuSMAFnjHA==}
+    peerDependencies:
+      zod: ^3.25.28 || ^4
+
   zod@3.25.76:
     resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==}
 
@@ -9739,6 +9952,52 @@ snapshots:
 
   '@alloc/quick-lru@5.2.0': {}
 
+  '@anthropic-ai/claude-agent-sdk-darwin-arm64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-darwin-x64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-linux-arm64-musl@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-linux-arm64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-linux-x64-musl@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-linux-x64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-win32-arm64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk-win32-x64@0.3.158':
+    optional: true
+
+  '@anthropic-ai/claude-agent-sdk@0.3.158(@anthropic-ai/sdk@0.100.1(zod@3.25.76))(@modelcontextprotocol/sdk@1.29.0(zod@3.25.76))(zod@3.25.76)':
+    dependencies:
+      '@anthropic-ai/sdk': 0.100.1(zod@3.25.76)
+      '@modelcontextprotocol/sdk': 1.29.0(zod@3.25.76)
+      zod: 3.25.76
+    optionalDependencies:
+      '@anthropic-ai/claude-agent-sdk-darwin-arm64': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-darwin-x64': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-linux-arm64': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-linux-arm64-musl': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-linux-x64': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-linux-x64-musl': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-win32-arm64': 0.3.158
+      '@anthropic-ai/claude-agent-sdk-win32-x64': 0.3.158
+
+  '@anthropic-ai/sdk@0.100.1(zod@3.25.76)':
+    dependencies:
+      json-schema-to-ts: 3.1.1
+      standardwebhooks: 1.0.0
+    optionalDependencies:
+      zod: 3.25.76
+
   '@ark/schema@0.56.0':
     dependencies:
       '@ark/util': 0.56.0
@@ -10593,6 +10852,28 @@ snapshots:
 
   '@mjackson/node-fetch-server@0.2.0': {}
 
+  '@modelcontextprotocol/sdk@1.29.0(zod@3.25.76)':
+    dependencies:
+      '@hono/node-server': 1.19.11(hono@4.11.4)
+      ajv: 8.20.0
+      ajv-formats: 3.0.1(ajv@8.20.0)
+      content-type: 1.0.5
+      cors: 2.8.6
+      cross-spawn: 7.0.6
+      eventsource: 3.0.7
+      eventsource-parser: 3.1.0
+      express: 5.2.1
+      express-rate-limit: 8.5.2(express@5.2.1)
+      hono: 4.11.4
+      jose: 6.2.3
+      json-schema-typed: 8.0.2
+      pkce-challenge: 5.0.1
+      raw-body: 3.0.2
+      zod: 3.25.76
+      zod-to-json-schema: 3.25.2(zod@3.25.76)
+    transitivePeerDependencies:
+      - supports-color
+
   '@mongodb-js/saslprep@1.4.11':
     dependencies:
       sparse-bitfield: 3.0.3
@@ -11338,6 +11619,8 @@ snapshots:
 
   '@speed-highlight/core@1.2.15': {}
 
+  '@stablelib/base64@1.0.1': {}
+
   '@standard-schema/spec@1.1.0': {}
 
   '@statsig/client-core@3.31.0': {}
@@ -11672,6 +11955,11 @@ snapshots:
       mime-types: 2.1.35
       negotiator: 0.6.3
 
+  accepts@2.0.0:
+    dependencies:
+      mime-types: 3.0.2
+      negotiator: 1.0.0
+
   acorn-jsx-walk@2.0.0: {}
 
   acorn-jsx@5.3.2(acorn@8.16.0):
@@ -11708,6 +11996,10 @@ snapshots:
       indent-string: 4.0.0
     optional: true
 
+  ajv-formats@3.0.1(ajv@8.20.0):
+    optionalDependencies:
+      ajv: 8.20.0
+
   ajv@8.20.0:
     dependencies:
       fast-deep-equal: 3.1.3
@@ -11879,6 +12171,20 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  body-parser@2.2.2:
+    dependencies:
+      bytes: 3.1.2
+      content-type: 1.0.5
+      debug: 4.4.3
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      on-finished: 2.4.1
+      qs: 6.15.1
+      raw-body: 3.0.2
+      type-is: 2.1.0
+    transitivePeerDependencies:
+      - supports-color
+
   brace-expansion@1.1.15:
     dependencies:
       balanced-match: 1.0.2
@@ -12072,16 +12378,27 @@ snapshots:
     dependencies:
       safe-buffer: 5.2.1
 
+  content-disposition@1.1.0: {}
+
   content-type@1.0.5: {}
 
+  content-type@2.0.0: {}
+
   convert-source-map@2.0.0: {}
 
   cookie-signature@1.0.7: {}
 
+  cookie-signature@1.2.2: {}
+
   cookie@0.7.2: {}
 
   cookie@1.1.1: {}
 
+  cors@2.8.6:
+    dependencies:
+      object-assign: 4.1.1
+      vary: 1.1.2
+
   cross-spawn@7.0.6:
     dependencies:
       path-key: 3.1.1
@@ -12347,6 +12664,12 @@ snapshots:
     transitivePeerDependencies:
       - bare-abort-controller
 
+  eventsource-parser@3.1.0: {}
+
+  eventsource@3.0.7:
+    dependencies:
+      eventsource-parser: 3.1.0
+
   evlog@1.9.0: {}
 
   exit-hook@2.2.1: {}
@@ -12355,6 +12678,11 @@ snapshots:
 
   expect-type@1.3.0: {}
 
+  express-rate-limit@8.5.2(express@5.2.1):
+    dependencies:
+      express: 5.2.1
+      ip-address: 10.2.0
+
   express@4.22.1:
     dependencies:
       accepts: 1.3.8
@@ -12391,6 +12719,39 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  express@5.2.1:
+    dependencies:
+      accepts: 2.0.0
+      body-parser: 2.2.2
+      content-disposition: 1.1.0
+      content-type: 1.0.5
+      cookie: 0.7.2
+      cookie-signature: 1.2.2
+      debug: 4.4.3
+      depd: 2.0.0
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      finalhandler: 2.1.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      merge-descriptors: 2.0.0
+      mime-types: 3.0.2
+      on-finished: 2.4.1
+      once: 1.4.0
+      parseurl: 1.3.3
+      proxy-addr: 2.0.7
+      qs: 6.15.1
+      range-parser: 1.2.1
+      router: 2.2.0
+      send: 1.2.1
+      serve-static: 2.2.1
+      statuses: 2.0.2
+      type-is: 2.1.0
+      vary: 1.1.2
+    transitivePeerDependencies:
+      - supports-color
+
   exsolve@1.0.8: {}
 
   extend-shallow@2.0.1:
@@ -12401,6 +12762,8 @@ snapshots:
 
   fast-fifo@1.3.2: {}
 
+  fast-sha256@1.3.0: {}
+
   fast-string-truncated-width@3.0.3: {}
 
   fast-string-width@3.0.2:
@@ -12431,6 +12794,17 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  finalhandler@2.1.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      on-finished: 2.4.1
+      parseurl: 1.3.3
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+
   find-cache-dir@3.3.2:
     dependencies:
       commondir: 1.0.1
@@ -12455,6 +12829,8 @@ snapshots:
 
   fresh@0.5.2: {}
 
+  fresh@2.0.0: {}
+
   fs-constants@1.0.0: {}
 
   fs-minipass@2.1.0:
@@ -12630,6 +13006,10 @@ snapshots:
       safer-buffer: 2.1.2
     optional: true
 
+  iconv-lite@0.7.2:
+    dependencies:
+      safer-buffer: 2.1.2
+
   ieee754@1.2.1: {}
 
   ignore@7.0.5: {}
@@ -12660,8 +13040,7 @@ snapshots:
 
   interpret@3.1.1: {}
 
-  ip-address@10.2.0:
-    optional: true
+  ip-address@10.2.0: {}
 
   ipaddr.js@1.9.1: {}
 
@@ -12690,6 +13069,8 @@ snapshots:
 
   is-potential-custom-element-name@1.0.1: {}
 
+  is-promise@4.0.0: {}
+
   isarray@2.0.5: {}
 
   isbot@5.1.40: {}
@@ -12711,6 +13092,8 @@ snapshots:
 
   jiti@2.7.0: {}
 
+  jose@6.2.3: {}
+
   js-tokens@10.0.0: {}
 
   js-tokens@4.0.0: {}
@@ -12750,8 +13133,15 @@ snapshots:
 
   jsesc@3.1.0: {}
 
+  json-schema-to-ts@3.1.1:
+    dependencies:
+      '@babel/runtime': 7.29.2
+      ts-algebra: 2.0.0
+
   json-schema-traverse@1.0.0: {}
 
+  json-schema-typed@8.0.2: {}
+
   json-stable-stringify@1.3.0:
     dependencies:
       call-bind: 1.0.9
@@ -12920,10 +13310,14 @@ snapshots:
 
   media-typer@0.3.0: {}
 
+  media-typer@1.1.0: {}
+
   memory-pager@1.5.0: {}
 
   merge-descriptors@1.0.3: {}
 
+  merge-descriptors@2.0.0: {}
+
   methods@1.1.2: {}
 
   mime-db@1.52.0: {}
@@ -12934,6 +13328,10 @@ snapshots:
     dependencies:
       mime-db: 1.52.0
 
+  mime-types@3.0.2:
+    dependencies:
+      mime-db: 1.54.0
+
   mime@1.6.0: {}
 
   mimic-function@5.0.1: {}
@@ -13111,6 +13509,8 @@ snapshots:
 
   negotiator@0.6.4: {}
 
+  negotiator@1.0.0: {}
+
   new-find-package-json@2.0.0:
     dependencies:
       debug: 4.4.3
@@ -13179,6 +13579,8 @@ snapshots:
       set-blocking: 2.0.0
     optional: true
 
+  object-assign@4.1.1: {}
+
   object-hash@2.2.0: {}
 
   object-inspect@1.13.4: {}
@@ -13251,6 +13653,8 @@ snapshots:
 
   path-to-regexp@6.3.0: {}
 
+  path-to-regexp@8.4.2: {}
+
   pathe@1.1.2: {}
 
   pathe@2.0.3: {}
@@ -13317,6 +13721,8 @@ snapshots:
 
   picomatch@4.0.4: {}
 
+  pkce-challenge@5.0.1: {}
+
   pkg-dir@4.2.0:
     dependencies:
       find-up: 4.1.0
@@ -13438,6 +13844,13 @@ snapshots:
       iconv-lite: 0.4.24
       unpipe: 1.0.0
 
+  raw-body@3.0.2:
+    dependencies:
+      bytes: 3.1.2
+      http-errors: 2.0.1
+      iconv-lite: 0.7.2
+      unpipe: 1.0.0
+
   rc9@3.0.1:
     dependencies:
       defu: 6.1.7
@@ -13642,6 +14055,16 @@ snapshots:
       '@rollup/rollup-win32-x64-msvc': 4.59.0
       fsevents: 2.3.3
 
+  router@2.2.0:
+    dependencies:
+      debug: 4.4.3
+      depd: 2.0.0
+      is-promise: 4.0.0
+      parseurl: 1.3.3
+      path-to-regexp: 8.4.2
+    transitivePeerDependencies:
+      - supports-color
+
   safe-buffer@5.1.2: {}
 
   safe-buffer@5.2.1: {}
@@ -13689,6 +14112,22 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  send@1.2.1:
+    dependencies:
+      debug: 4.4.3
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      etag: 1.8.1
+      fresh: 2.0.0
+      http-errors: 2.0.1
+      mime-types: 3.0.2
+      ms: 2.1.3
+      on-finished: 2.4.1
+      range-parser: 1.2.1
+      statuses: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+
   serve-static@1.16.3:
     dependencies:
       encodeurl: 2.0.0
@@ -13698,6 +14137,15 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  serve-static@2.2.1:
+    dependencies:
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      parseurl: 1.3.3
+      send: 1.2.1
+    transitivePeerDependencies:
+      - supports-color
+
   set-blocking@2.0.0:
     optional: true
 
@@ -13873,6 +14321,11 @@ snapshots:
 
   stackback@0.0.2: {}
 
+  standardwebhooks@1.0.0:
+    dependencies:
+      '@stablelib/base64': 1.0.1
+      fast-sha256: 1.3.0
+
   statuses@2.0.2: {}
 
   std-env@3.10.0: {}
@@ -14035,6 +14488,8 @@ snapshots:
 
   tree-kill@1.2.2: {}
 
+  ts-algebra@2.0.0: {}
+
   ts-toolbelt@9.6.0: {}
 
   tsconfck@3.1.6(typescript@5.9.3):
@@ -14108,6 +14563,12 @@ snapshots:
       media-typer: 0.3.0
       mime-types: 2.1.35
 
+  type-is@2.1.0:
+    dependencies:
+      content-type: 2.0.0
+      media-typer: 1.1.0
+      mime-types: 3.0.2
+
   typescript@5.9.3: {}
 
   unconfig-core@7.5.0:
@@ -14488,4 +14949,8 @@ snapshots:
       grammex: 3.1.12
       graphmatch: 1.1.1
 
+  zod-to-json-schema@3.25.2(zod@3.25.76):
+    dependencies:
+      zod: 3.25.76
+
   zod@3.25.76: {}
diff --git a/projects/drive-judge-harness/slices/claude-runtime/plan.md b/projects/drive-judge-harness/slices/claude-runtime/plan.md
new file mode 100644
index 0000000000..c3ee0bf977
--- /dev/null
+++ b/projects/drive-judge-harness/slices/claude-runtime/plan.md
@@ -0,0 +1,29 @@
+# Plan: claude-runtime (TML-2759)
+
+Test-first. The Claude SDK is reached only via `claude-adapter.ts`'s lazy import (mirroring `sdk-adapter.ts`); all mapping logic lives in the no-SDK `claude-events.ts` so it's unit-testable with the SDK absent. Built on branch `tml-2757-run-fidelity` (PR #657), on top of the run-fidelity commits.
+
+## Dispatches
+
+### D1 — `claude-events.ts`: pure mappers + extraction (test-first)
+- **Outcome:** Claude message/result shapes map to the harness's `RunStreamEvent` + a rich outcome, with no SDK import.
+- Implement `usageFromAssistant`, `streamEventFromMessage`, `outcomeFromResult` (→ `{status,runId,tokens,durationMs,costUsd,numTurns}`) over `unknown`. Map `cache_creation_input_tokens`→`cacheWriteTokens`, `cache_read_input_tokens`→`cacheReadTokens`; `session_id`→`runId`; `subtype==='success'`→`finished`.
+- Tests (`test/claude-events.test.ts`): real `SDKResultMessage` (success + an `error_*` subtype) + a real `assistant` message; assert token totals, `cost_usd`, `wall_clock_ms` (`duration_ms`), `num_turns`, `run_id`; degrade on non-records. SDK not installed.
+- **Builds on:** run-fidelity (`usage.ts`, the seam). **Hands to:** D2.
+
+### D2 — `claude-adapter.ts` + seam/manifest + runtime selection (test-first)
+- **Outcome:** the harness runs on Claude by default and records tokens/cost/turns; `--runtime cursor` still works.
+- `RunOutcome` gains `tokens`/`costUsd`/`numTurns` (Cursor adapter sets null). `run-one-brief.ts`: prefer `outcome.tokens` else `accumulateUsage`; populate `cost_usd`/`num_turns`/`wall_clock_ms`; runtime selection + per-runtime key gating; `defaultCreateAgent(runtime)`. `manifest.ts`: add `runtime`/`cost_usd`/`num_turns`. `run-arm.ts` + `run-one-brief.ts` CLIs: `--runtime` (default claude), `--max-budget-usd`.
+- `claude-adapter.ts`: `query()` with `cwd`/`settingSources:['project']`/`skills:'all'`/`permissionMode:'bypassPermissions'`/`allowDangerouslySkipPermissions:true`/`model`/`maxBudgetUsd`; buffer the result for `wait()`.
+- Tests: injected `createAgent` returning a Claude-shaped outcome → manifest has `runtime:'claude'`, non-null `tokens`/`cost_usd`/`num_turns`; a `--runtime cursor` selection test; key-gating per runtime.
+- **Builds on:** D1. **Hands to:** D3 (orchestrator).
+
+### D3 — install + docs + live smoke + gates + PR (orchestrator)
+- Install `@anthropic-ai/claude-agent-sdk` (`pnpm add -w -D`); handle any build-script/native hiccups as with `@cursor/sdk`.
+- Wire `test/claude-events.test.ts` into `test:scripts`.
+- Docs: SKILL.md "Runtimes" section (claude default / cursor secondary, selection, `maxBudgetUsd`); scope the token-gap note to the Cursor adapter in SKILL.md + KNOWN-ISSUES.
+- Live smoke on `claude-haiku-4-5` iff `ANTHROPIC_API_KEY` present (else gated follow-up note).
+- Gates: `pnpm test:scripts`, biome, transient-id scan. Update PR #657 title/body to "faithful + decoupled runs" (refs TML-2757 + TML-2759). Commit signed-off, push.
+- **Builds on:** D2.
+
+## Sequencing
+Serial: D1 (mappers) → D2 (adapter + wiring consume them) → D3 (install/docs/gates). Target 3 dispatches; D1+D2 delegated to one implementer, D3 by the orchestrator.
diff --git a/projects/drive-judge-harness/slices/claude-runtime/spec.md b/projects/drive-judge-harness/slices/claude-runtime/spec.md
new file mode 100644
index 0000000000..4d828c3b7e
--- /dev/null
+++ b/projects/drive-judge-harness/slices/claude-runtime/spec.md
@@ -0,0 +1,88 @@
+# Slice: claude-runtime
+
+_Parent project `projects/drive-judge-harness/`. Outcome this slice contributes: the harness is **decoupled from Cursor** — it runs the Drive orchestrator on Anthropic's Claude Agent SDK by default, which reports real token usage, USD cost, and wall-clock natively (the signal `@cursor/sdk`'s local runtime never gave us). The Cursor adapter stays as a runtime-selectable secondary. Delivered alongside the run-fidelity fixes on the same branch/PR (#657)._
+
+## At a glance
+
+A live run now records tokens + dollars + wall-clock, because the runtime reports them:
+
+```jsonc
+{ "runtime": "claude", "model": "claude-haiku-4-5", "status": "finished",
+  "run_id": "<session-id>", "agent_id": null,
+  "tokens": { "inputTokens": 33, "outputTokens": 904, "cacheReadTokens": 230827, "cacheWriteTokens": 53995, "totalTokens": 285759 },
+  "cost_usd": 0.1839242, "num_turns": 9, "wall_clock_ms": 16025, "notes": [] }
+```
+
+The Cursor runtime stays available via `--runtime cursor`; its token gap (documented in the run-fidelity work) is now scoped to that adapter.
+
+## Chosen design
+
+The Cursor coupling lives in exactly one module behind a seam that already exists: `run-one-brief.ts` defines `CreateAgent` / `OrchestratorRun` / `RunOutcome`; `sdk-adapter.ts` is the only `@cursor/sdk` importer. This slice adds a **second adapter** over the same seam.
+
+Ground-truth Claude Agent SDK shapes (`@anthropic-ai/claude-agent-sdk`, confirmed from the cost-tracking + TS-reference docs):
+- `query({ prompt, options })` returns an async iterable of messages.
+- Per-`assistant` message: nested `message.usage` (`input_tokens`, `output_tokens`, `cache_creation_input_tokens`, `cache_read_input_tokens`) + `message.id`.
+- Terminal `result` message (`SDKResultMessage`): `subtype` (`success` | `error_*`), cumulative `usage` (same fields), `total_cost_usd`, `duration_ms`, `num_turns`, `session_id`, `result`.
+
+### 1. `claude-events.ts` — pure mappers (no SDK import)
+
+Mirror of `sdk-events.ts`, for the Claude shapes. Operates over `unknown`; imports nothing from the SDK so it's unit-testable with the SDK absent. Exports:
+- `usageFromAssistant(msg) -> TurnUsage | null` — maps `message.usage` (`cache_creation_input_tokens`→`cacheWriteTokens`, `cache_read_input_tokens`→`cacheReadTokens`).
+- `streamEventFromMessage(msg) -> RunStreamEvent` — `assistant` with usage → `turn-ended`; else `other`.
+- `outcomeFromResult(msg) -> { status; runId; tokens; durationMs; costUsd; numTurns } | null` — only for `type: 'result'`. `subtype === 'success'` → `finished`, else `error`; `session_id` → `runId`; cumulative `usage` → `TokenTotals`; `total_cost_usd` → `costUsd`; `duration_ms` → `durationMs`; `num_turns` → `numTurns`. Degrades on non-records.
+
+### 2. `claude-adapter.ts` — the only Claude-SDK importer (lazy)
+
+Implements `CreateAgent` over `query()`. Because `query()` is one generator (not split stream/wait), the adapter iterates it inside `stream()`, yields `turn-ended` events from per-assistant usage, captures the terminal `result` message, and returns it from `wait()` (run-one-brief drains the stream before calling `wait()`, so the result is available). `query()` options for an **unattended, skill-aware** orchestrator run:
+- `cwd: runDir` (the prepared checkout — its `.claude/skills/` are the injected bundle)
+- `settingSources: ['project']` (loads `.claude/skills/`, `.claude/agents/`, `CLAUDE.md` from the checkout)
+- `skills: 'all'` (auto-enables the `Skill` tool)
+- `permissionMode: 'bypassPermissions'` + `allowDangerouslySkipPermissions: true` (no interactive prompts)
+- `model` (the pinned model id)
+- `maxBudgetUsd` when provided (hard per-run dollar cap — aborts with `error_max_budget_usd`)
+
+### 3. Seam + manifest extensions
+
+- `RunOutcome` gains `tokens: TokenTotals | null`, `costUsd: number | null`, `numTurns: number | null` (Cursor adapter sets these `null`; tokens still flow via per-turn accumulation there).
+- `run-one-brief.ts`: prefer `outcome.tokens` when present, else fall back to `accumulateUsage(usageUpdates)`. Populate `cost_usd` / `num_turns` / `wall_clock_ms` from the outcome. The null-token note (from the run-fidelity work) fires only when tokens are genuinely null.
+- `RunManifest` gains `runtime: 'claude' | 'cursor'`, `cost_usd: number | null`, `num_turns: number | null`.
+- **Runtime selection:** `RunOneBriefConfig`/`RunArmConfig` gain `runtime: 'claude' | 'cursor'` (default `'claude'`) and optional `maxBudgetUsd`. `defaultCreateAgent(runtime)` lazily imports the matching adapter. The gate's `apiKeyPresent` is computed against the runtime's key (`ANTHROPIC_API_KEY` for claude, `CURSOR_API_KEY` for cursor). CLI gains `--runtime <claude|cursor>` (default claude) and `--max-budget-usd <n>`.
+
+## Coherence rationale
+
+One reviewer holds it in one sitting: a second adapter behind an existing seam, plus the manifest fields the new runtime can finally populate. It's entangled with the run-fidelity work on the same branch — both are "make the recorded run faithful," and this slice is what turns the token gap that work documented into a captured signal. Rolls forward as: new pure module + new adapter + additive outcome/manifest fields + a runtime selector. No production package touched.
+
+## Scope
+
+**In:** `claude-events.ts` (+ tests with real result/assistant fixtures); `claude-adapter.ts` (lazy, sole Claude-SDK importer); `RunOutcome`/`RunManifest` additions; runtime selection + key-gating + CLI flags in `run-one-brief.ts` and `run-arm.ts`; install `@anthropic-ai/claude-agent-sdk`; SKILL.md runtimes section + scope the token-gap doc to the Cursor adapter; new test wired into `test:scripts`. Delivered on branch `tml-2757-run-fidelity` / PR #657.
+
+**Out:** removing the Cursor adapter (kept as secondary, operator decision). The A/B loop / aggregation / CI gate (TML-2737). Judge calibration (TML-2736) and corpus generation (real-dollar, operator-gated).
+
+## Pre-investigated edge cases
+
+| Edge case | Disposition | Notes |
+|---|---|---|
+| `query()` is one generator, not stream+wait | Drove the adapter shape | Iterate in `stream()`, stash the `result` message for `wait()`. |
+| Claude reports cumulative usage on `result`, not just per-turn | `RunOutcome.tokens` | run-one-brief prefers outcome tokens; per-turn accumulation stays the Cursor path. |
+| No `agent_id` concept in Claude SDK | `agent_id: null`, `session_id`→`run_id` | The session id is the run identifier. |
+| Unattended run hitting a permission prompt | `bypassPermissions` + `allowDangerouslySkipPermissions` | Required for autonomous orchestrator runs. |
+| Runaway cost during calibration | `maxBudgetUsd` cap | Aborts with `error_max_budget_usd`; recorded as an error run with usage-so-far. |
+| `@anthropic-ai/claude-agent-sdk` not installed at test time | Lazy import behind the gate | `claude-events.ts` has no SDK import; tests never load the adapter. |
+
+## Slice-specific done conditions
+
+- [ ] A test feeds a real `SDKResultMessage` (success + an `error_*` subtype) through `claude-events.ts` and asserts `tokens`, `cost_usd`, `wall_clock_ms`, `num_turns`, `run_id` extraction — with the SDK not installed.
+- [ ] `--runtime cursor` still produces a Cursor-runtime manifest (selection works both ways).
+- [ ] A live smoke run on `claude-haiku-4-5` records non-null `tokens` + `cost_usd` **iff** `ANTHROPIC_API_KEY` is present; otherwise this is a gated follow-up.
+
+## Open Questions
+
+1. **Subagent token attribution.** Claude's `usage` aggregates orchestrator + subagents into one run total (per-subagent breakdown is an open SDK request). Working position: the run total is exactly what we want for the efficiency metric; per-subagent attribution is not needed for this slice.
+
+## References
+
+- Parent: `projects/drive-judge-harness/spec.md`; sibling run-fidelity slice (same branch).
+- Spike: `projects/drive-judge-harness/spikes/2026-05-31-sdk-token-usage-retrieval.md`.
+- Linear: [TML-2759](https://linear.app/prisma-company/issue/TML-2759) (related TML-2757, blocks TML-2737).
+- SDK docs: [cost-tracking](https://code.claude.com/docs/en/agent-sdk/cost-tracking), [TS reference](https://code.claude.com/docs/en/agent-sdk/typescript), [skills](https://code.claude.com/docs/en/agent-sdk/skills).
+- Seam: `skills-contrib/drive-judge-harness/{run-one-brief,sdk-adapter,sdk-events,run-arm,manifest,usage}.ts`.
diff --git a/projects/drive-judge-harness/slices/run-fidelity/plan.md b/projects/drive-judge-harness/slices/run-fidelity/plan.md
new file mode 100644
index 0000000000..5689ab341c
--- /dev/null
+++ b/projects/drive-judge-harness/slices/run-fidelity/plan.md
@@ -0,0 +1,37 @@
+# Plan: run-fidelity (TML-2757)
+
+Test-first throughout. The live SDK is reached only via `sdk-adapter.ts`'s dynamic import; all new logic lives in no-SDK-import modules so it's unit-testable with `@cursor/sdk` absent. Spike `2026-05-31-sdk-token-usage-retrieval.md` is committed in dispatch 1.
+
+## Dispatches
+
+### D1 — `sdk-events.ts`: pure mappers + real-shape extraction (test-first)
+- **Outcome:** message/outcome mapping lives in a no-SDK module, with `agent_id` and `durationMs` extracted from the **real captured shapes**.
+- Move `extractText` / `toStreamEvent` / `adaptOutcome` (and the now-dead `extractUsage`) out of `sdk-adapter.ts` into new `sdk-events.ts` (imports nothing from the SDK; operates over `unknown`). Add `agentIdFromMessage`, `outcomeFromResult` (→ `{status,runId,durationMs}`), `streamEventFromMessage`.
+- Tests (`test/sdk-events.test.ts`): feed the real `status`/`assistant`/outcome fixtures from the spike; assert `agent_id`, `durationMs`, stream mapping. Runs with the SDK uninstalled.
+- `sdk-adapter.ts` imports the mappers (no behaviour change).
+- Commit the spike artifact here.
+- **Builds on:** merged run-setup. **Hands to:** D2.
+
+### D2 — capture agent_id + wall-clock end-to-end (test-first)
+- **Outcome:** a finished run records the real `agent_id` and `wall_clock_ms`.
+- `run-one-brief.ts`: `RunOutcome` gains `durationMs: number | null`; adapter captures `agent_id` from the first stream message carrying one and returns it from `wait()`.
+- `manifest.ts`: add `wall_clock_ms`; add the token-unavailable note when `tokens` is null on a finished live run. `run-arm.ts` threads `wall_clock_ms` into the enriched manifest.
+- Tests: outcome→manifest mapping populates `agent_id` + `wall_clock_ms`; null-token note present.
+- **Builds on:** D1. **Hands to:** D3.
+
+### D3 — `collect-run` run-scoping (test-first)
+- **Outcome:** `collectRun` returns only traces emitted during the run.
+- `prepare-run.ts`: snapshot `*.jsonl` under `runDir` after the baseline commit → `PreparedRun.preexistingTracePaths`.
+- `collect-run.ts`: exclude `preexistingTracePaths`; `agent_id` match over the remainder.
+- Tests: baseline-committed trace + run-emitted trace → only the latter returned (cover a gitignored-path trace).
+- **Builds on:** D2. **Hands to:** D4.
+
+### D4 — docs + gates + PR
+- **Outcome:** token gap documented; suite green; PR open.
+- SKILL.md / KNOWN-ISSUES: token gap (link spike) + wall-clock-as-primary note.
+- Wire new tests into `test:scripts`; run `pnpm -w typecheck`, `pnpm -w lint`, `pnpm -w test:scripts`; fix fallout.
+- Stage explicitly, sign off, push to `tml-2757-run-fidelity`, open PR (create-pr skill).
+- **Builds on:** D3.
+
+## Sequencing
+Serial: D1 unlocks testability, D2 consumes the extractors, D3 is independent of D2 but shares the manifest touch (sequence after to avoid conflict), D4 closes. Target 4 dispatches.
diff --git a/projects/drive-judge-harness/slices/run-fidelity/spec.md b/projects/drive-judge-harness/slices/run-fidelity/spec.md
new file mode 100644
index 0000000000..d740089f02
--- /dev/null
+++ b/projects/drive-judge-harness/slices/run-fidelity/spec.md
@@ -0,0 +1,76 @@
+# Slice: run-fidelity
+
+_Parent project `projects/drive-judge-harness/`. Outcome this slice contributes: the harness records a **faithful** run — correct `agent_id`, a real wall-clock signal, and a trace set scoped to what the run actually emitted — so the corpus the judge calibrates against and the A/B engine ranks on isn't polluted or blank. Fixes the three fidelity defects the first live `run-arm` exposed._
+
+## At a glance
+
+The first live run (composer-2.5, i12-halt) proved the pipeline but mis-recorded the run: `agent_id: null`, `tokens` all-zero, and `collected_trace_paths` containing 5 pre-existing committed traces from the base checkout plus 1 real one. This slice fixes the recordable defects and honestly documents the one that isn't recordable:
+
+- **`agent_id`** is read from the stream `status` message (where the local runtime actually puts it), not the `wait()` outcome.
+- **Wall-clock** (`durationMs` from the outcome) is captured as `wall_clock_ms` — the primary Tier-2 efficiency metric, since tokens are unavailable.
+- **`collect-run`** returns only traces *emitted during the run*, not every schema-valid `.jsonl` in the checkout.
+- **Tokens** stay `null` for local runs with an explicit note + documented SDK limitation (spike `2026-05-31-sdk-token-usage-retrieval.md`).
+
+## Chosen design
+
+Ground-truth shapes from the spike probe (`@cursor/sdk@1.0.15`, local runtime):
+- stream `status` → `{ type:"status", agent_id, run_id, status }`
+- stream `assistant` → `{ type:"assistant", agent_id, run_id, message }`
+- outcome (`wait()`) → `{ id, status, result, model, durationMs }` (no `agent_id`, no tokens)
+
+### 1. `sdk-events.ts` — extract the pure mappers (no SDK import)
+
+Today the message/outcome mappers (`extractUsage`, `extractText`, `adaptOutcome`, `toStreamEvent`) live inside `sdk-adapter.ts`, which `import`s `@cursor/sdk` at module top — so they can't be unit-tested without the SDK installed. Move them into a new **`sdk-events.ts`** that imports nothing from the SDK and operates over `unknown`. `sdk-adapter.ts` imports them. This is what lets the fixes be test-first while preserving the live-execution gate (SDK reached only via `sdk-adapter.ts`'s dynamic import).
+
+`sdk-events.ts` exports pure functions, unit-tested against the **real captured shapes**:
+- `streamEventFromMessage(msg) -> RunStreamEvent` — maps `status`/`assistant` (real shapes) and keeps the `turn-ended` branch for the cloud runtime (still valid if ever used).
+- `agentIdFromMessage(msg) -> string | null` — reads snake_case `agent_id`.
+- `outcomeFromResult(raw) -> { status, runId, durationMs }` — reads `id`→runId, `status`, `durationMs` (number|null).
+
+### 2. `run-one-brief.ts` — capture agent_id + wall-clock
+
+`RunOutcome` gains `durationMs: number | null`. The adapter captures `agent_id` from the **first stream message that carries one** (run-one-brief drains the stream before `wait()`, so it's available), and `wait()` returns it as `agentId`. `durationMs` flows from `outcomeFromResult`. No behaviour change to the dry-run/gate paths.
+
+### 3. `manifest.ts` — wall-clock + honest token note
+
+Add `wall_clock_ms: number | null` (from `durationMs`). When `tokens` is `null` on a *finished live* run, append a note: `"tokens unavailable: @cursor/sdk local runtime emits no usage events (see spike 2026-05-31)"`. `tokens` field stays (null for local).
+
+### 4. `collect-run.ts` — scope to run-emitted traces
+
+`PreparedRun` gains `preexistingTracePaths: string[]` — the set of `*.jsonl` present under `runDir` immediately after `prepareRun`'s baseline commit (the base checkout's committed traces). `collectRun` excludes that set, so `tracePaths` contains only traces the run produced. This is deterministic (no mtime/clock reliance) and robust to gitignored trace locations (e.g. `wip/drive-trace/`, where the real trace landed). `agent_id` matching then runs over the run-emitted set only.
+
+## Coherence rationale
+
+One reviewer holds it in one sitting: every change serves "record the run faithfully," and they're entangled — the `agent_id` fix is what makes `collect-run`'s matching work, the mapper extraction is what makes both testable, and the wall-clock capture is the efficiency metric that stands in for the tokens the SDK won't give us. Rolls back as one unit (one new pure module + additive manifest/outcome fields + a `collect-run` scoping change). Touches no production package.
+
+## Scope
+
+**In:** new `sdk-events.ts` (+ tests with real-shape fixtures); `sdk-adapter.ts` (import the mappers, capture stream `agent_id`); `run-one-brief.ts` (`RunOutcome.durationMs`, agent_id wiring); `manifest.ts` (`wall_clock_ms` + token note); `collect-run.ts` + `prepare-run.ts` (`preexistingTracePaths` snapshot + exclusion); `run-arm.ts` (thread `wall_clock_ms` into the enriched manifest); the spike artifact; SKILL.md / KNOWN-ISSUES note on the token gap; new tests wired into `test:scripts`.
+
+**Out:** a non-SDK token source (Cursor admin/usage API, CLI telemetry) — deferred, out of scope (spike decision). The k=N A/B loop, aggregation, CI gate — TML-2737. The judge — TML-2736.
+
+## Pre-investigated edge cases
+
+| Edge case | Disposition | Notes |
+|---|---|---|
+| Local runtime emits no usage event | Documented, not fixed | Confirmed by spike; `tokens: null` + note is the honest record. |
+| Real trace landed in gitignored `wip/drive-trace/` | Drove the design | Snapshot-exclusion (not git-diff) is why scoping works for gitignored traces. |
+| `agent_id` present on stream but not outcome | Core of the fix | Capture from the stream message, not `wait()`. |
+| Multiple run-emitted traces remain after exclusion | Matching handles it | `agent_id` match, else newest, over the run-emitted set. |
+
+## Slice-specific done conditions
+
+- [ ] A test feeds the **real captured** `status`/`assistant`/outcome shapes (from the spike) through `sdk-events.ts` and asserts `agent_id` + `durationMs` extraction — with `@cursor/sdk` not installed.
+- [ ] A `collect-run` test with a baseline-committed trace + a run-emitted trace asserts only the latter is returned.
+
+## Open Questions
+
+1. **Snapshot `preexistingTracePaths` in `prepare-run` vs re-scan in `collect-run`?** Working position: snapshot in `prepare-run` (deterministic, captures the exact pre-run state) and pass it through `PreparedRun`. Re-scanning in `collect-run` would race any late base writes.
+
+## References
+
+- Parent project: `projects/drive-judge-harness/spec.md`
+- Spike: `projects/drive-judge-harness/spikes/2026-05-31-sdk-token-usage-retrieval.md`
+- Linear: [TML-2757](https://linear.app/prisma-company/issue/TML-2757) (blocks TML-2737)
+- Surfaces: `skills-contrib/drive-judge-harness/{sdk-adapter,run-one-brief,manifest,collect-run,prepare-run,run-arm}.ts`
+- First-run evidence: manifest at `run-arm-i12-…/run-manifest.json` (agent_id null, tokens 0, polluted trace list)
diff --git a/projects/drive-judge-harness/spikes/2026-05-31-sdk-token-usage-retrieval.md b/projects/drive-judge-harness/spikes/2026-05-31-sdk-token-usage-retrieval.md
new file mode 100644
index 0000000000..34ea18bf96
--- /dev/null
+++ b/projects/drive-judge-harness/spikes/2026-05-31-sdk-token-usage-retrieval.md
@@ -0,0 +1,26 @@
+# Spike: can per-run token usage be retrieved from `@cursor/sdk` for a local run?
+
+**Date:** 2026-05-31 · **Trigger:** the first live `run-arm` (composer-2.5, i12-halt) returned `tokens: {all zero}`. **Question:** is the token signal — our stated #1 efficiency metric after correctness — obtainable from the SDK for a *local-runtime* run, via the stream, the run outcome, the `analytics` surface, or the cloud-API `getRun`?
+
+## Answer
+
+**No. Token usage is not retrievable via the `@cursor/sdk` public surface for local runs, by any path.** Wall-clock (`durationMs`) is available and becomes the primary efficiency metric; `tokens` is honestly `null` from the runtime.
+
+## Evidence (`@cursor/sdk@1.0.15`)
+
+A throwaway probe spawned a trivial local run and dumped every stream message + the `wait()` outcome:
+
+- **Stream messages** — only two types across the whole run: `status` `{ type, agent_id, run_id, status }` and `assistant` `{ type, agent_id, run_id, message }`. **No `turnEnded` / `usage` event is emitted by the local runtime.** (The SDK *does* define a `usage: { inputTokens, outputTokens, cacheReadTokens, cacheWriteTokens }` schema, but it rides on a `turnEnded` update that only the **cloud** runtime streams.)
+- **Run outcome** (`wait()`) — `{ id, status, result, model, durationMs }`. Carries wall-clock (`durationMs`), no tokens. `agent_id` is **not** here; it is on the stream messages.
+- **`analytics.d.ts`** — emit-only outbound telemetry (`trackSdkRunCreated/Completed/SendLatency`, `flushSdkAnalytics`). No read-back API. The event props (`SdkRunCreatedProps`, `SdkRunCompletedProps`, `SdkRunSendLatencyProps`) carry `turn_count`, latency, `end_reason` — **no token counts**.
+- **`cloud-api-client` `getRun({agentId,runId}) → V1Run`** — `{ id, agentId, status, createdAt, updatedAt, durationMs?, result?, git? }`. **No tokens.** `RunResultMetadata` and `executor-types.d.ts` have zero token/usage/cost fields. (Also a cloud-agent query; a local run is not necessarily registered there.)
+
+## Decision (re-route)
+
+Proceed on **option (d)**:
+
+- Capture `durationMs` (wall-clock) from the run outcome → the primary Tier-2 efficiency metric for local runs.
+- `tokens` stays `null` for local runs, with an explicit manifest note + a documented SDK limitation (consumption gotcha). Not a bug in our extraction — there is nothing to extract.
+- A future token source must come from **outside** the SDK (a Cursor admin/usage API, or CLI-internal telemetry). Out of scope for the fix slice.
+
+Companion clean fixes (same slice): capture `agent_id` from the stream `status` message; scope `collect-run` to traces emitted *during* the run (exclude baseline-committed traces).
diff --git a/skills-contrib/drive-judge-harness/KNOWN-ISSUES.md b/skills-contrib/drive-judge-harness/KNOWN-ISSUES.md
index 08d8abff2d..1a9d53e776 100644
--- a/skills-contrib/drive-judge-harness/KNOWN-ISSUES.md
+++ b/skills-contrib/drive-judge-harness/KNOWN-ISSUES.md
@@ -52,3 +52,21 @@ The token-usage signal this harness needs comes from `TurnEndedUpdate.usage`, wh
 - read the per-turn `usage` field through a small, explicitly-bounded structural view in `sdk-adapter.ts` (guarded at runtime; no bare casts) rather than a fabricated full mirror of the SDK's types.
 
 When upstream ships resolvable types, replace that structural view with the real `TurnEndedUpdate` import and delete the workaround.
+
+## 2. The local runtime emits no token-usage signal at all
+
+Distinct from (and more fundamental than) the type-resolution gap above: even at **runtime**, the `@cursor/sdk` *local* runtime never emits a usage signal, so there is nothing to read regardless of types.
+
+Confirmed by a probe against `@cursor/sdk@1.0.15`:
+
+- The local `run.stream()` yields only `status` and `assistant` messages — **no `turnEnded`/`usage` event** (that update is streamed only by the *cloud* runtime).
+- The `run.wait()` outcome (`{ id, status, result, model, durationMs }`) carries wall-clock but **no tokens**.
+- The cloud `getRun → V1Run` (`{ id, agentId, status, createdAt, updatedAt, durationMs?, result?, git? }`), `RunResultMetadata`, and the `analytics` surface (emit-only `trackSdkRun*`; props carry `turn_count`/latency/`end_reason`) all carry **no token counts**.
+
+### Impact on this harness
+
+For local runs, `tokens` is `null` (with a manifest note), and **`wall_clock_ms` (the outcome's `durationMs`) is the primary efficiency metric.** `accumulateUsage` remains wired, so usage flows automatically if a cloud run (which does stream `turnEnded`) is used, or once a non-SDK local token source exists.
+
+### Suggested fix (upstream)
+
+Stream `turnEnded` (with `usage`) from the local runtime as the cloud runtime already does, or expose per-run token counts on the run outcome / a queryable usage API.
diff --git a/skills-contrib/drive-judge-harness/SKILL.md b/skills-contrib/drive-judge-harness/SKILL.md
index 9f1c7fc579..da63dac466 100644
--- a/skills-contrib/drive-judge-harness/SKILL.md
+++ b/skills-contrib/drive-judge-harness/SKILL.md
@@ -2,14 +2,15 @@
 name: drive-judge-harness
 description: >
   Spawns one Drive orchestrator run on a golden-case brief with a pinned model,
-  accumulates per-run token usage, and writes a run manifest — the corpus
-  generator the Drive LLM judge calibrates against. Supports a pinned skill-bundle
-  input (prepare-run → spawn → collect-run) so runs are reproducible against a
-  known base ref + skill version. Use when you want to run a golden Drive brief
-  end-to-end, produce a natively-instrumented run, accumulate token usage from the
-  Cursor SDK, or validate the post-hoc trace parser against a transcript corpus.
-  Live execution is gated behind --live + CURSOR_API_KEY; the default is a dry-run
-  that makes no live call.
+  records per-run token usage / USD cost / wall-clock, and writes a run manifest —
+  the corpus generator the Drive LLM judge calibrates against. Runs on the Claude
+  Agent SDK by default (Cursor SDK selectable via --runtime cursor). Supports a
+  pinned skill-bundle input (prepare-run → spawn → collect-run) so runs are
+  reproducible against a known base ref + skill version. Use when you want to run a
+  golden Drive brief end-to-end, produce a natively-instrumented run, capture
+  token/cost/wall-clock, or validate the post-hoc trace parser against a transcript
+  corpus. Live execution is gated behind --live + the runtime's API key; the default
+  is a dry-run that makes no live call.
 ---
 
 # Drive: Judge harness (run-one-brief / run-arm)
@@ -48,24 +49,56 @@ builds the k=N A/B loop on top of.
 - `run-one-brief.ts` — `runOneBrief(config, deps)` + a CLI. Owns the
   live-execution gate and orchestration. Accepts `runDir` so the orchestrator
   spawns inside the prepared checkout.
-- `sdk-adapter.ts` — the **only** module that touches `@cursor/sdk`, via a
-  dynamic import reached solely on the live path. Uses the `cwd` passed from
-  `run-one-brief` rather than the harness's `process.cwd()`.
+- `claude-adapter.ts` — the **default** live runtime: the only module that
+  imports `@anthropic-ai/claude-agent-sdk`, via a dynamic import reached solely on
+  the live claude path. Runs the orchestrator with the injected `.claude/skills/`
+  loaded (`settingSources: ['project']`, `skills: 'all'`) inside the prepared
+  checkout, unattended (`permissionMode: 'bypassPermissions'`), with an optional
+  `maxBudgetUsd` hard cap. Reports tokens + `total_cost_usd` + `duration_ms` +
+  `num_turns` natively.
+- `sdk-adapter.ts` — the **secondary** Cursor runtime: the only module that
+  touches `@cursor/sdk`, via a dynamic import reached solely on the live cursor
+  path. Selected with `--runtime cursor`.
+- `claude-events.ts` / `sdk-events.ts` — pure message-shape mappers for each
+  runtime (no SDK import), so the adapters' extraction logic is unit-testable with
+  neither SDK installed.
 - `validate-parser.ts` — validates `drive-diagnose-run/posthoc.ts` over a
   transcript corpus, tallying reconstruction confidence (clears TML-2728).
 - `judge/` — the bespoke-minimal LLM judge (TML-2736). Grades one Drive run
   from its diff + acceptance set + trace excerpts and emits the `intent`
   correctness component the scorecard already reads. See § The LLM judge below.
 
+## Runtimes (claude default, cursor secondary)
+
+The harness is decoupled from any one agent vendor by a small seam in
+`run-one-brief.ts` (`CreateAgent` / `OrchestratorRun` / `RunOutcome`); each runtime
+is one adapter behind it, selected with `--runtime <claude|cursor>` (default
+`claude`).
+
+- **`claude`** (default) — Anthropic's Claude Agent SDK. The native home of the
+  SKILL.md + subagent conventions the drive-* skills use, and it reports
+  per-run **tokens, USD cost (`cost_usd`), wall-clock (`wall_clock_ms`), and turn
+  count (`num_turns`)** on its terminal result message. Key: `ANTHROPIC_API_KEY`.
+  Supports `--max-budget-usd <n>` (a hard per-run dollar cap; the run aborts with
+  an error result if the estimate reaches it).
+- **`cursor`** (secondary) — `@cursor/sdk`. Kept for spot-checking the Cursor
+  substrate. Its **local runtime emits no token usage** (see KNOWN-ISSUES.md and
+  the spike), so cursor runs record `tokens: null` and rely on `wall_clock_ms`
+  alone. Key: `CURSOR_API_KEY`.
+
+The model is pinned with `--model` (e.g. `claude-haiku-4-5` for cheap calibration,
+`claude-sonnet-4-5` for realistic runs); the harness never hardcodes one.
+
 ## The live-execution gate (safety property)
 
-A live run requires **both** `--live` **and** a present `CURSOR_API_KEY`.
-Otherwise the harness takes the **dry-run** path: it never imports `@cursor/sdk`,
-never makes a network call, and writes a manifest with `status: "dry-run"`,
-`tokens: null`. Because the SDK is reached only through `sdk-adapter.ts`'s
-dynamic import on the live path, **typecheck / test / lint / CI all stay green
-with no `CURSOR_API_KEY` set and `@cursor/sdk` not installed.** Tests inject a
-mock `createAgent` and never make a live call.
+A live run requires **both** `--live` **and** the selected runtime's API key
+present (`ANTHROPIC_API_KEY` for claude, `CURSOR_API_KEY` for cursor). Otherwise
+the harness takes the **dry-run** path: it never imports an SDK, never makes a
+network call, and writes a manifest with `status: "dry-run"`, `tokens: null`.
+Because each SDK is reached only through its adapter's dynamic import on the live
+path, **typecheck / test / lint / CI all stay green with no API key set and
+neither SDK installed.** Tests inject a mock `createAgent` and never make a live
+call.
 
 ## The pinned skill-bundle pipeline (run-arm)
 
@@ -168,6 +201,18 @@ trace via the emitter. The spawned orchestrator self-instruments its Drive
 methodology events into `--trace-file` via `drive-record-traces`; the harness
 owns only the token manifest.
 
+**Token availability is per-runtime.** On the default **claude** runtime the
+terminal result message carries cumulative `usage` + `total_cost_usd`, so a run
+records real `tokens` **and** `cost_usd` (plus `num_turns` and `wall_clock_ms`).
+On the secondary **cursor** runtime the *local* `@cursor/sdk` runtime emits no
+usage signal at all — nothing in its stream, run outcome, `getRun`/`V1Run` cloud
+query, or `analytics` surface carries token counts (see KNOWN-ISSUES.md § 2 for
+the probe findings) — so cursor runs record `tokens: null` with a note, and
+fall back to **`wall_clock_ms`** as the efficiency metric. `run-one-brief` reads
+`tokens` from the run outcome when the runtime provides them (claude), else
+accumulates per-turn usage (the cursor path), so both runtimes feed the same
+`TokenTotals` shape.
+
 ## The LLM judge (`judge/`)
 
 A bespoke-minimal grader that turns the run's artifacts (diff + golden
diff --git a/skills-contrib/drive-judge-harness/claude-adapter.ts b/skills-contrib/drive-judge-harness/claude-adapter.ts
new file mode 100644
index 0000000000..e142719186
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/claude-adapter.ts
@@ -0,0 +1,94 @@
+import { outcomeFromResult, streamEventFromMessage } from './claude-events.ts';
+import type { CreateAgent, OrchestratorRun, RunOutcome } from './run-one-brief.ts';
+import { isRecord } from './sdk-events.ts';
+
+// The ONLY module that imports `@anthropic-ai/claude-agent-sdk`, loaded lazily
+// by run-one-brief on the live claude path. Never reached under test (tests
+// inject a mock `createAgent`). The SDK is not installed during development or
+// CI; the dynamic import path is the only gate.
+//
+// `query({ prompt, options })` returns an async generator of messages. We iterate
+// the entire generator in `stream()`, yielding normalized RunStreamEvents, and
+// capture the terminal `result` message for `wait()` to consume. run-one-brief
+// drains `stream()` fully before calling `wait()`, so the captured result is
+// always available by then.
+//
+// Pure message-shape mappers live in `claude-events.ts` — no SDK import there,
+// fully unit-testable with the SDK absent.
+
+/** Normalize an SDK `query()` generator into the harness's `OrchestratorRun`. */
+function adaptQuery(generator: AsyncIterable<unknown>): OrchestratorRun {
+  let capturedResult: unknown = null;
+  return {
+    async *stream() {
+      for await (const msg of generator) {
+        if (isResultMessage(msg)) {
+          capturedResult = msg;
+        }
+        yield streamEventFromMessage(msg);
+      }
+    },
+    async wait(): Promise<RunOutcome> {
+      const parsed = capturedResult !== null ? outcomeFromResult(capturedResult) : null;
+      if (parsed === null) {
+        return {
+          status: 'error',
+          runId: null,
+          agentId: null,
+          durationMs: null,
+          tokens: null,
+          costUsd: null,
+          numTurns: null,
+        };
+      }
+      return {
+        status: parsed.status,
+        runId: parsed.runId,
+        agentId: null,
+        durationMs: parsed.durationMs,
+        tokens: parsed.tokens,
+        costUsd: parsed.costUsd,
+        numTurns: parsed.numTurns,
+      };
+    },
+  };
+}
+
+function isResultMessage(msg: unknown): msg is Record<string, unknown> {
+  return isRecord(msg) && msg.type === 'result';
+}
+
+function isAsyncIterable(v: unknown): v is AsyncIterable<unknown> {
+  return v !== null && typeof v === 'object' && Symbol.asyncIterator in v;
+}
+
+/** Live `CreateAgent` backed by `@anthropic-ai/claude-agent-sdk`. Reached only
+ *  on the live claude path. */
+export const createClaudeAgent: CreateAgent = async ({ model, prompt, cwd, maxBudgetUsd }) => {
+  const apiKey = process.env.ANTHROPIC_API_KEY;
+  if (typeof apiKey !== 'string' || apiKey.length === 0) {
+    throw new Error('ANTHROPIC_API_KEY is required for a live claude run');
+  }
+
+  // Dynamic import keeps this module evaluatable (typecheck/lint/test) with
+  // @anthropic-ai/claude-agent-sdk absent from node_modules.
+  const { query } = await import('@anthropic-ai/claude-agent-sdk');
+
+  const options: Record<string, unknown> = {
+    cwd,
+    model,
+    settingSources: ['project'],
+    skills: 'all',
+    permissionMode: 'bypassPermissions',
+    allowDangerouslySkipPermissions: true,
+  };
+  if (maxBudgetUsd != null) {
+    options.maxBudgetUsd = maxBudgetUsd;
+  }
+
+  const rawResult: unknown = query({ prompt, options });
+  if (!isAsyncIterable(rawResult)) {
+    throw new Error('SDK query() did not return an AsyncIterable');
+  }
+  return adaptQuery(rawResult);
+};
diff --git a/skills-contrib/drive-judge-harness/claude-events.ts b/skills-contrib/drive-judge-harness/claude-events.ts
new file mode 100644
index 0000000000..6520ee20e1
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/claude-events.ts
@@ -0,0 +1,82 @@
+import type { RunStreamEvent } from './run-one-brief.ts';
+import { asString, isRecord } from './sdk-events.ts';
+import { accumulateUsage, type TokenTotals, type TurnUsage } from './usage.ts';
+
+// Pure message-shape mappers for the Anthropic Claude Agent SDK.
+//
+// These operate over `unknown` and have zero dependency on
+// `@anthropic-ai/claude-agent-sdk`, so they can be unit-tested with the SDK
+// absent. The sole SDK importer remains `claude-adapter.ts`, which wires these
+// utilities into the live path.
+//
+// Real shapes from @anthropic-ai/claude-agent-sdk (confirmed from SDK docs):
+//
+//   stream assistant: { type: "assistant", message: { id, usage: {
+//                         input_tokens, output_tokens,
+//                         cache_creation_input_tokens,
+//                         cache_read_input_tokens } } }
+//   terminal result:  { type: "result", subtype: "success"|"error_max_turns"|...,
+//                       session_id, duration_ms, num_turns, total_cost_usd,
+//                       usage: { input_tokens, output_tokens,
+//                                cache_creation_input_tokens,
+//                                cache_read_input_tokens } }
+//
+// Field mapping (SDK snake_case -> harness camelCase):
+//   input_tokens               -> inputTokens
+//   output_tokens              -> outputTokens
+//   cache_read_input_tokens    -> cacheReadTokens
+//   cache_creation_input_tokens -> cacheWriteTokens
+
+function mapUsage(usage: Record<string, unknown>): TurnUsage {
+  const num = (v: unknown): number | null => (typeof v === 'number' ? v : null);
+  return {
+    inputTokens: num(usage.input_tokens),
+    outputTokens: num(usage.output_tokens),
+    cacheReadTokens: num(usage.cache_read_input_tokens),
+    cacheWriteTokens: num(usage.cache_creation_input_tokens),
+  };
+}
+
+/** Read usage from an assistant message's nested `message.usage` object.
+ *  Returns null if the message is not an assistant type or has no usage. */
+export function usageFromAssistant(msg: unknown): TurnUsage | null {
+  if (!isRecord(msg) || msg.type !== 'assistant') return null;
+  const message = msg.message;
+  if (!isRecord(message)) return null;
+  const usage = message.usage;
+  if (!isRecord(usage)) return null;
+  return mapUsage(usage);
+}
+
+/** Map a raw Claude SDK stream message onto a normalized `RunStreamEvent`.
+ *  An assistant message with usage maps to `turn-ended`; everything else is `other`. */
+export function streamEventFromMessage(msg: unknown): RunStreamEvent {
+  const usage = usageFromAssistant(msg);
+  if (usage !== null) return { kind: 'turn-ended', usage };
+  return { kind: 'other' };
+}
+
+/** Map a raw Claude SDK terminal `result` message to the harness outcome fields.
+ *  Returns null when `msg.type !== 'result'`. Degrades gracefully on non-records. */
+export function outcomeFromResult(msg: unknown): {
+  status: 'finished' | 'error';
+  runId: string | null;
+  tokens: TokenTotals | null;
+  durationMs: number | null;
+  costUsd: number | null;
+  numTurns: number | null;
+} | null {
+  if (!isRecord(msg) || msg.type !== 'result') return null;
+  const status: 'finished' | 'error' = msg.subtype === 'success' ? 'finished' : 'error';
+  const runId = asString(msg.session_id);
+  const durationMs = typeof msg.duration_ms === 'number' ? msg.duration_ms : null;
+  const costUsd = typeof msg.total_cost_usd === 'number' ? msg.total_cost_usd : null;
+  const numTurns = typeof msg.num_turns === 'number' ? msg.num_turns : null;
+
+  const usageRaw = msg.usage;
+  const tokens: TokenTotals | null = isRecord(usageRaw)
+    ? accumulateUsage([mapUsage(usageRaw)])
+    : null;
+
+  return { status, runId, tokens, durationMs, costUsd, numTurns };
+}
diff --git a/skills-contrib/drive-judge-harness/collect-run.ts b/skills-contrib/drive-judge-harness/collect-run.ts
index fb2e70b123..51935332b6 100644
--- a/skills-contrib/drive-judge-harness/collect-run.ts
+++ b/skills-contrib/drive-judge-harness/collect-run.ts
@@ -1,9 +1,9 @@
 import { spawnSync } from 'node:child_process';
-import { type Dirent, readdirSync, readFileSync, statSync } from 'node:fs';
+import { readFileSync, statSync } from 'node:fs';
 import { type } from 'arktype';
-import { join } from 'pathe';
 import { Slice1TraceEvent } from '../drive-record-traces/schema.ts';
 import type { PreparedRun } from './prepare-run.ts';
+import { findJsonlFiles } from './trace-files.ts';
 
 export type CollectedRun = {
   tracePaths: string[];
@@ -13,25 +13,6 @@ export type CollectedRun = {
   untraced: boolean;
 };
 
-function findJsonlFiles(dir: string): string[] {
-  const results: string[] = [];
-  let entries: Dirent[];
-  try {
-    entries = readdirSync(dir, { withFileTypes: true });
-  } catch {
-    return results;
-  }
-  for (const entry of entries) {
-    const fullPath = join(dir, entry.name);
-    if (entry.isDirectory()) {
-      results.push(...findJsonlFiles(fullPath));
-    } else if (entry.isFile() && entry.name.endsWith('.jsonl')) {
-      results.push(fullPath);
-    }
-  }
-  return results;
-}
-
 function firstLineOf(filePath: string): string | null {
   let content: string;
   try {
@@ -106,9 +87,10 @@ export function collectRun(
   prepared: PreparedRun,
   opts?: { agentId?: string | null },
 ): CollectedRun {
-  const { runDir, prepareCommit } = prepared;
+  const { runDir, prepareCommit, preexistingTracePaths } = prepared;
 
-  const allJsonl = findJsonlFiles(runDir);
+  const preexistingSet = new Set(preexistingTracePaths);
+  const allJsonl = findJsonlFiles(runDir).filter((p) => !preexistingSet.has(p));
   const tracePaths = allJsonl.filter(isValidTrace);
 
   let matchedTrace: string | null = null;
diff --git a/skills-contrib/drive-judge-harness/manifest.ts b/skills-contrib/drive-judge-harness/manifest.ts
index a2ed635a2d..6b1e14ca31 100644
--- a/skills-contrib/drive-judge-harness/manifest.ts
+++ b/skills-contrib/drive-judge-harness/manifest.ts
@@ -23,13 +23,22 @@ export type RunManifest = {
   schema_version: '1';
   case_slug: string;
   model: string;
+  /** The adapter runtime used for this run. */
+  runtime: 'claude' | 'cursor';
   status: RunStatus;
   run_id: string | null;
   agent_id: string | null;
   trace_file: string;
   /** Accumulated per-run usage, or `null` when no live run produced a signal
-   *  (dry-run, or a startup failure before any turn completed). */
+   *  (dry-run, startup failure, or local runtime which emits no usage events). */
   tokens: TokenTotals | null;
+  /** Wall-clock duration reported by the SDK (`wait()` outcome `durationMs`),
+   *  or `null` for dry-run, startup-failed, and error paths. */
+  wall_clock_ms: number | null;
+  /** Total USD cost reported by the runtime, or `null` when unavailable. */
+  cost_usd: number | null;
+  /** Number of turns reported by the runtime, or `null` when unavailable. */
+  num_turns: number | null;
   started_at: string;
   finished_at: string | null;
   notes: string[];
diff --git a/skills-contrib/drive-judge-harness/prepare-run.ts b/skills-contrib/drive-judge-harness/prepare-run.ts
index cec1c45cdc..3c9ce740c8 100644
--- a/skills-contrib/drive-judge-harness/prepare-run.ts
+++ b/skills-contrib/drive-judge-harness/prepare-run.ts
@@ -1,4 +1,5 @@
 import { spawnSync } from 'node:child_process';
+import { findJsonlFiles } from './trace-files.ts';
 
 export type SkillBundleRef = {
   repoDir: string;
@@ -19,6 +20,11 @@ export type PreparedRun = {
   skillBundleSha: string;
   prepareCommit: string;
   materialized: boolean;
+  /** Paths of all `.jsonl` files present under `runDir` immediately after the
+   *  baseline commit — i.e. traces committed in the base checkout before the
+   *  agent run starts. `collectRun` excludes these so only run-emitted traces
+   *  are collected. Deterministic snapshot (no mtime reliance). */
+  preexistingTracePaths: string[];
 };
 
 export type PrepareRunDeps = {
@@ -97,6 +103,8 @@ export function prepareRun(config: PrepareRunConfig, deps?: PrepareRunDeps): Pre
   git(['commit', '--allow-empty', '-m', 'prepare-run baseline'], config.runDir);
   const prepareCommit = git(['rev-parse', 'HEAD'], config.runDir).stdout;
 
+  const preexistingTracePaths = findJsonlFiles(config.runDir);
+
   return {
     runDir: config.runDir,
     baseRef: config.baseRef,
@@ -104,5 +112,6 @@ export function prepareRun(config: PrepareRunConfig, deps?: PrepareRunDeps): Pre
     skillBundleSha,
     prepareCommit,
     materialized: matResult.ok,
+    preexistingTracePaths,
   };
 }
diff --git a/skills-contrib/drive-judge-harness/run-arm.ts b/skills-contrib/drive-judge-harness/run-arm.ts
index 7abc4ac901..bf122cbf00 100644
--- a/skills-contrib/drive-judge-harness/run-arm.ts
+++ b/skills-contrib/drive-judge-harness/run-arm.ts
@@ -23,6 +23,10 @@ export type RunArmConfig = {
   manifestFile: string;
   live: boolean;
   apiKeyPresent: boolean;
+  /** The adapter runtime to use. Defaults to `'claude'` in the CLI. */
+  runtime: 'claude' | 'cursor';
+  /** Optional hard per-run USD budget cap (Claude adapter only). */
+  maxBudgetUsd?: number;
 };
 
 export type RunArmDeps = {
@@ -57,6 +61,8 @@ export async function runArm(config: RunArmConfig, deps?: RunArmDeps): Promise<R
       runDir: prepared.runDir,
       live: config.live,
       apiKeyPresent: config.apiKeyPresent,
+      runtime: config.runtime,
+      maxBudgetUsd: config.maxBudgetUsd,
     },
     { createAgent: deps?.createAgent, now: deps?.now },
   );
@@ -88,8 +94,9 @@ const USAGE =
   'Usage: node skills-contrib/drive-judge-harness/run-arm.ts ' +
   '--repo <repo-dir> --base-ref <ref> --bundle-ref <ref> --run-dir <dir> ' +
   '--case <golden-case-dir> --model <model-id> ' +
-  '[--bundle-repo <dir>] [--manifest-file <path>] [--live]\n' +
-  'Live execution requires both --live and CURSOR_API_KEY.';
+  '[--bundle-repo <dir>] [--manifest-file <path>] [--live] ' +
+  '[--runtime <claude|cursor>] [--max-budget-usd <n>]\n' +
+  'Live execution requires both --live and the runtime API key. Default runtime is claude.';
 
 function parseArgs(argv: string[]): {
   repo?: string;
@@ -101,6 +108,8 @@ function parseArgs(argv: string[]): {
   model?: string;
   manifestFile?: string;
   live: boolean;
+  runtime: 'claude' | 'cursor';
+  maxBudgetUsd?: number;
 } {
   let repo: string | undefined;
   let baseRef: string | undefined;
@@ -111,6 +120,8 @@ function parseArgs(argv: string[]): {
   let model: string | undefined;
   let manifestFile: string | undefined;
   let live = false;
+  let runtime: 'claude' | 'cursor' = 'claude';
+  let maxBudgetUsd: number | undefined;
 
   for (let i = 0; i < argv.length; i++) {
     const arg = argv[i];
@@ -153,12 +164,42 @@ function parseArgs(argv: string[]): {
       case '--live':
         live = true;
         break;
+      case '--runtime': {
+        const val = takeValue();
+        if (val !== 'claude' && val !== 'cursor') {
+          process.stderr.write(`--runtime must be "claude" or "cursor"\n${USAGE}\n`);
+          process.exit(1);
+        }
+        runtime = val;
+        break;
+      }
+      case '--max-budget-usd': {
+        const val = Number(takeValue());
+        if (!Number.isFinite(val) || val <= 0) {
+          process.stderr.write(`--max-budget-usd must be a positive number\n${USAGE}\n`);
+          process.exit(1);
+        }
+        maxBudgetUsd = val;
+        break;
+      }
       default:
         process.stderr.write(`Unknown argument: ${arg}\n${USAGE}\n`);
         process.exit(1);
     }
   }
-  return { repo, baseRef, bundleRef, bundleRepo, runDir, caseDir, model, manifestFile, live };
+  return {
+    repo,
+    baseRef,
+    bundleRef,
+    bundleRepo,
+    runDir,
+    caseDir,
+    model,
+    manifestFile,
+    live,
+    runtime,
+    maxBudgetUsd,
+  };
 }
 
 async function main(): Promise<void> {
@@ -179,6 +220,9 @@ async function main(): Promise<void> {
   const bundleRepoDir = parsed.bundleRepo ?? repoUnderTestDir;
   const manifestFile = parsed.manifestFile ?? join(parsed.runDir, 'run-manifest.json');
   const traceFile = join(parsed.runDir, 'run-trace.jsonl');
+  const runtime = parsed.runtime;
+  const apiKeyEnvVar = runtime === 'cursor' ? 'CURSOR_API_KEY' : 'ANTHROPIC_API_KEY';
+  const apiKeyValue = process.env[apiKeyEnvVar];
 
   const result = await runArm({
     repoUnderTestDir,
@@ -190,8 +234,9 @@ async function main(): Promise<void> {
     traceFile,
     manifestFile,
     live: parsed.live,
-    apiKeyPresent:
-      typeof process.env.CURSOR_API_KEY === 'string' && process.env.CURSOR_API_KEY.length > 0,
+    apiKeyPresent: typeof apiKeyValue === 'string' && apiKeyValue.length > 0,
+    runtime,
+    maxBudgetUsd: parsed.maxBudgetUsd,
   });
 
   process.stdout.write(`${result.manifestContent}\n`);
diff --git a/skills-contrib/drive-judge-harness/run-one-brief.ts b/skills-contrib/drive-judge-harness/run-one-brief.ts
index 313cb02540..6298065a5d 100644
--- a/skills-contrib/drive-judge-harness/run-one-brief.ts
+++ b/skills-contrib/drive-judge-harness/run-one-brief.ts
@@ -28,6 +28,10 @@ export type RunOutcome = {
   status: 'finished' | 'error';
   runId: string | null;
   agentId: string | null;
+  durationMs: number | null;
+  tokens: TokenTotals | null;
+  costUsd: number | null;
+  numTurns: number | null;
 };
 
 /** A started orchestrator run the harness observes. */
@@ -37,11 +41,12 @@ export type OrchestratorRun = {
 };
 
 /** Spawns an orchestrator run for a pinned model + prompt. Injected in tests;
- *  the live default is loaded lazily from `sdk-adapter.ts`. */
+ *  the live default is loaded lazily from the matching adapter module. */
 export type CreateAgent = (opts: {
   model: string;
   prompt: string;
   cwd: string;
+  maxBudgetUsd?: number;
 }) => Promise<OrchestratorRun>;
 
 export type RunOneBriefConfig = {
@@ -54,8 +59,12 @@ export type RunOneBriefConfig = {
   runDir: string;
   /** Caller asked for a live run. */
   live: boolean;
-  /** Whether a Cursor API key is present in the environment. */
+  /** Whether the runtime's API key is present in the environment. */
   apiKeyPresent: boolean;
+  /** The adapter runtime to use. Defaults to `'claude'` in the CLI. */
+  runtime: 'claude' | 'cursor';
+  /** Optional hard per-run USD budget cap (Claude adapter only). */
+  maxBudgetUsd?: number;
 };
 
 export type RunOneBriefDeps = {
@@ -85,11 +94,15 @@ function gateSatisfied(config: RunOneBriefConfig): boolean {
   return config.live && config.apiKeyPresent;
 }
 
-async function defaultCreateAgent(): Promise<CreateAgent> {
-  // Lazy import so `@cursor/sdk` is only required when a live run is actually
+async function defaultCreateAgent(runtime: 'claude' | 'cursor'): Promise<CreateAgent> {
+  // Lazy import so the SDK is only required when a live run is actually
   // requested without an injected agent. Never reached under test.
-  const adapter = await import('./sdk-adapter.ts');
-  return adapter.createCursorAgent;
+  if (runtime === 'cursor') {
+    const adapter = await import('./sdk-adapter.ts');
+    return adapter.createCursorAgent;
+  }
+  const adapter = await import('./claude-adapter.ts');
+  return adapter.createClaudeAgent;
 }
 
 /** Run one brief end-to-end (or dry-run) and write the manifest. */
@@ -107,18 +120,23 @@ export async function runOneBrief(
     model: config.model,
     trace_file: config.traceFile,
     started_at: startedAt,
+    runtime: config.runtime,
   } as const;
 
   if (!gateSatisfied(config)) {
+    const keyName = config.runtime === 'cursor' ? 'CURSOR_API_KEY' : 'ANTHROPIC_API_KEY';
     const reason = !config.live
-      ? 'dry-run: live execution not requested (pass --live and set CURSOR_API_KEY to run live)'
-      : 'dry-run: live requested but CURSOR_API_KEY is absent';
+      ? `dry-run: live execution not requested (pass --live and set ${keyName} to run live)`
+      : `dry-run: live requested but ${keyName} is absent`;
     const manifest: RunManifest = {
       ...baseManifest,
       status: 'dry-run',
       run_id: null,
       agent_id: null,
       tokens: null,
+      wall_clock_ms: null,
+      cost_usd: null,
+      num_turns: null,
       finished_at: now(),
       notes: [reason, 'no SDK call was made; no orchestrator run was spawned'],
     };
@@ -126,12 +144,17 @@ export async function runOneBrief(
     return { status: 'dry-run', manifest, manifestContent, createAgentCalled: false };
   }
 
-  const createAgent = deps.createAgent ?? (await defaultCreateAgent());
+  const createAgent = deps.createAgent ?? (await defaultCreateAgent(config.runtime));
   const prompt = assemblePrompt(golden);
 
   let run: OrchestratorRun;
   try {
-    run = await createAgent({ model: config.model, prompt, cwd: config.runDir });
+    run = await createAgent({
+      model: config.model,
+      prompt,
+      cwd: config.runDir,
+      maxBudgetUsd: config.maxBudgetUsd,
+    });
   } catch (err) {
     const manifest: RunManifest = {
       ...baseManifest,
@@ -139,6 +162,9 @@ export async function runOneBrief(
       run_id: null,
       agent_id: null,
       tokens: null,
+      wall_clock_ms: null,
+      cost_usd: null,
+      num_turns: null,
       finished_at: now(),
       notes: [`startup-failed: ${err instanceof Error ? err.message : String(err)}`],
     };
@@ -154,7 +180,16 @@ export async function runOneBrief(
       }
     }
     const outcome = await run.wait();
-    const tokens: TokenTotals = accumulateUsage(usageUpdates);
+    const accumulatedTokens: TokenTotals | null =
+      usageUpdates.length > 0 ? accumulateUsage(usageUpdates) : null;
+    const tokens: TokenTotals | null = outcome.tokens ?? accumulatedTokens;
+
+    const notes: string[] = [];
+    if (outcome.status === 'finished' && tokens === null) {
+      notes.push(
+        'tokens unavailable: @cursor/sdk local runtime emits no usage events (see KNOWN-ISSUES.md)',
+      );
+    }
 
     const manifest: RunManifest = {
       ...baseManifest,
@@ -162,8 +197,11 @@ export async function runOneBrief(
       run_id: outcome.runId,
       agent_id: outcome.agentId,
       tokens,
+      wall_clock_ms: outcome.durationMs,
+      cost_usd: outcome.costUsd,
+      num_turns: outcome.numTurns,
       finished_at: now(),
-      notes: [],
+      notes,
     };
     const manifestContent = writeManifest(config.manifestFile, manifest);
     return { status: outcome.status, manifest, manifestContent, createAgentCalled: true };
@@ -171,13 +209,17 @@ export async function runOneBrief(
     // A live stream/wait can throw mid-run; write an error manifest with the
     // usage gathered so far so the token signal and the failure survive rather
     // than escaping as an unhandled rejection out of `void main()`.
-    const tokens: TokenTotals = accumulateUsage(usageUpdates);
+    const tokens: TokenTotals | null =
+      usageUpdates.length > 0 ? accumulateUsage(usageUpdates) : null;
     const manifest: RunManifest = {
       ...baseManifest,
       status: 'error',
       run_id: null,
       agent_id: null,
       tokens,
+      wall_clock_ms: null,
+      cost_usd: null,
+      num_turns: null,
       finished_at: now(),
       notes: [`error: ${err instanceof Error ? err.message : String(err)}`],
     };
@@ -193,8 +235,8 @@ export async function runOneBrief(
 const USAGE =
   'Usage: node skills-contrib/drive-judge-harness/run-one-brief.ts ' +
   '--case <golden-case-dir> --model <model-id> [--trace-file <path>] ' +
-  '[--manifest-file <path>] [--live]\n' +
-  'Live execution requires both --live and CURSOR_API_KEY. Default is dry-run.';
+  '[--manifest-file <path>] [--live] [--runtime <claude|cursor>] [--max-budget-usd <n>]\n' +
+  'Live execution requires both --live and the runtime API key. Default runtime is claude.';
 
 function parseArgs(argv: string[]): {
   caseDir?: string;
@@ -202,12 +244,16 @@ function parseArgs(argv: string[]): {
   traceFile?: string;
   manifestFile?: string;
   live: boolean;
+  runtime: 'claude' | 'cursor';
+  maxBudgetUsd?: number;
 } {
   let caseDir: string | undefined;
   let model: string | undefined;
   let traceFile: string | undefined;
   let manifestFile: string | undefined;
   let live = false;
+  let runtime: 'claude' | 'cursor' = 'claude';
+  let maxBudgetUsd: number | undefined;
   for (let i = 0; i < argv.length; i++) {
     const arg = argv[i];
     const takeValue = (): string => {
@@ -238,12 +284,30 @@ function parseArgs(argv: string[]): {
       case '--live':
         live = true;
         break;
+      case '--runtime': {
+        const val = takeValue();
+        if (val !== 'claude' && val !== 'cursor') {
+          process.stderr.write(`--runtime must be "claude" or "cursor"\n${USAGE}\n`);
+          process.exit(1);
+        }
+        runtime = val;
+        break;
+      }
+      case '--max-budget-usd': {
+        const val = Number(takeValue());
+        if (!Number.isFinite(val) || val <= 0) {
+          process.stderr.write(`--max-budget-usd must be a positive number\n${USAGE}\n`);
+          process.exit(1);
+        }
+        maxBudgetUsd = val;
+        break;
+      }
       default:
         process.stderr.write(`Unknown argument: ${arg}\n${USAGE}\n`);
         process.exit(1);
     }
   }
-  return { caseDir, model, traceFile, manifestFile, live };
+  return { caseDir, model, traceFile, manifestFile, live, runtime, maxBudgetUsd };
 }
 
 async function main(): Promise<void> {
@@ -254,6 +318,9 @@ async function main(): Promise<void> {
   }
   const traceFile = parsed.traceFile ?? join(parsed.caseDir, 'run-trace.jsonl');
   const manifestFile = parsed.manifestFile ?? join(parsed.caseDir, 'run-manifest.json');
+  const runtime = parsed.runtime;
+  const apiKeyEnvVar = runtime === 'cursor' ? 'CURSOR_API_KEY' : 'ANTHROPIC_API_KEY';
+  const apiKeyValue = process.env[apiKeyEnvVar];
 
   const result = await runOneBrief({
     caseDir: parsed.caseDir,
@@ -262,8 +329,9 @@ async function main(): Promise<void> {
     manifestFile,
     runDir: process.cwd(),
     live: parsed.live,
-    apiKeyPresent:
-      typeof process.env.CURSOR_API_KEY === 'string' && process.env.CURSOR_API_KEY.length > 0,
+    apiKeyPresent: typeof apiKeyValue === 'string' && apiKeyValue.length > 0,
+    runtime,
+    maxBudgetUsd: parsed.maxBudgetUsd,
   });
 
   process.stdout.write(`${result.manifestContent}\n`);
diff --git a/skills-contrib/drive-judge-harness/sdk-adapter.ts b/skills-contrib/drive-judge-harness/sdk-adapter.ts
index 5660a6b28e..7555f0375d 100644
--- a/skills-contrib/drive-judge-harness/sdk-adapter.ts
+++ b/skills-contrib/drive-judge-harness/sdk-adapter.ts
@@ -1,6 +1,6 @@
 import { Agent } from '@cursor/sdk';
-import type { CreateAgent, OrchestratorRun, RunOutcome, RunStreamEvent } from './run-one-brief.ts';
-import type { TurnUsage } from './usage.ts';
+import type { CreateAgent, OrchestratorRun, RunOutcome } from './run-one-brief.ts';
+import { agentIdFromMessage, outcomeFromResult, streamEventFromMessage } from './sdk-events.ts';
 
 // The ONLY module that touches `@cursor/sdk`, loaded lazily by run-one-brief on
 // the live path, so typecheck / tests / lint / dry-run never require it.
@@ -14,58 +14,10 @@ import type { TurnUsage } from './usage.ts';
 // rather than fabricating a full mirror of the SDK's type surface. When upstream
 // ships self-contained declarations, replace these reads with the real types.
 // See ./KNOWN-ISSUES.md.
-
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === 'object' && value !== null && !Array.isArray(value);
-}
-
-function asString(value: unknown): string | null {
-  return typeof value === 'string' ? value : null;
-}
-
-function extractUsage(raw: unknown): TurnUsage | null {
-  if (!isRecord(raw)) return null;
-  const usage = raw.usage;
-  if (!isRecord(usage)) return null;
-  const num = (v: unknown): number | null => (typeof v === 'number' ? v : null);
-  return {
-    inputTokens: num(usage.inputTokens),
-    outputTokens: num(usage.outputTokens),
-    cacheReadTokens: num(usage.cacheReadTokens),
-    cacheWriteTokens: num(usage.cacheWriteTokens),
-  };
-}
-
-function extractText(raw: unknown): string | null {
-  if (!isRecord(raw) || raw.type !== 'assistant') return null;
-  const message = raw.message;
-  if (!isRecord(message)) return null;
-  const content = message.content;
-  if (!Array.isArray(content)) return null;
-  let text = '';
-  for (const block of content) {
-    if (isRecord(block) && block.type === 'text' && typeof block.text === 'string') {
-      text += block.text;
-    }
-  }
-  return text.length > 0 ? text : null;
-}
-
-function toStreamEvent(message: unknown): RunStreamEvent {
-  const usage = extractUsage(message);
-  if (usage !== null) return { kind: 'turn-ended', usage };
-  const text = extractText(message);
-  if (text !== null) return { kind: 'text', text };
-  return { kind: 'other' };
-}
-
-function adaptOutcome(raw: unknown): RunOutcome {
-  if (!isRecord(raw)) {
-    return { status: 'error', runId: null, agentId: null };
-  }
-  const status = raw.status === 'finished' ? 'finished' : 'error';
-  return { status, runId: asString(raw.id), agentId: asString(raw.agentId) };
-}
+//
+// Pure message-shape mappers (isRecord, asString, extractUsage, extractText,
+// streamEventFromMessage, agentIdFromMessage, outcomeFromResult) live in
+// sdk-events.ts — no SDK import there, fully unit-testable without the SDK.
 
 /** Normalize a started SDK run into the harness's `OrchestratorRun`. Reads the
  *  run's `stream()` / `wait()` (documented runtime API); the yielded messages
@@ -74,14 +26,28 @@ function adaptRun(sdkRun: {
   stream(): AsyncIterable<unknown>;
   wait(): Promise<unknown>;
 }): OrchestratorRun {
+  let capturedAgentId: string | null = null;
   return {
     async *stream() {
       for await (const message of sdkRun.stream()) {
-        yield toStreamEvent(message);
+        if (capturedAgentId === null) {
+          capturedAgentId = agentIdFromMessage(message);
+        }
+        yield streamEventFromMessage(message);
       }
     },
-    async wait() {
-      return adaptOutcome(await sdkRun.wait());
+    async wait(): Promise<RunOutcome> {
+      const raw = await sdkRun.wait();
+      const { status, runId, durationMs } = outcomeFromResult(raw);
+      return {
+        status,
+        runId,
+        agentId: capturedAgentId,
+        durationMs,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      };
     },
   };
 }
diff --git a/skills-contrib/drive-judge-harness/sdk-events.ts b/skills-contrib/drive-judge-harness/sdk-events.ts
new file mode 100644
index 0000000000..f208a9d9c6
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/sdk-events.ts
@@ -0,0 +1,85 @@
+import type { RunStreamEvent } from './run-one-brief.ts';
+import type { TurnUsage } from './usage.ts';
+
+// Pure message-shape mappers for the Cursor SDK local runtime.
+//
+// These operate over `unknown` and have no dependency on `@cursor/sdk`, so they
+// can be unit-tested with the SDK absent. The sole SDK importer remains
+// `sdk-adapter.ts`, which imports these utilities and wires them into the live path.
+//
+// Real shapes from @cursor/sdk@1.0.15 local runtime (confirmed via a probe;
+// see KNOWN-ISSUES.md § 2):
+//
+//   stream status:    { type: "status",    agent_id, run_id, status }
+//   stream assistant: { type: "assistant", agent_id, run_id, message }
+//   wait() outcome:   { id, status, result, model, durationMs }
+//                     (no agent_id, no token/usage fields on the local runtime)
+
+export function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value);
+}
+
+export function asString(value: unknown): string | null {
+  return typeof value === 'string' ? value : null;
+}
+
+export function extractUsage(raw: unknown): TurnUsage | null {
+  if (!isRecord(raw)) return null;
+  const usage = raw.usage;
+  if (!isRecord(usage)) return null;
+  const num = (v: unknown): number | null => (typeof v === 'number' ? v : null);
+  return {
+    inputTokens: num(usage.inputTokens),
+    outputTokens: num(usage.outputTokens),
+    cacheReadTokens: num(usage.cacheReadTokens),
+    cacheWriteTokens: num(usage.cacheWriteTokens),
+  };
+}
+
+export function extractText(raw: unknown): string | null {
+  if (!isRecord(raw) || raw.type !== 'assistant') return null;
+  const message = raw.message;
+  if (!isRecord(message)) return null;
+  const content = message.content;
+  if (!Array.isArray(content)) return null;
+  let text = '';
+  for (const block of content) {
+    if (isRecord(block) && block.type === 'text' && typeof block.text === 'string') {
+      text += block.text;
+    }
+  }
+  return text.length > 0 ? text : null;
+}
+
+/** Map a raw SDK stream message onto a normalized `RunStreamEvent`. */
+export function streamEventFromMessage(message: unknown): RunStreamEvent {
+  const usage = extractUsage(message);
+  if (usage !== null) return { kind: 'turn-ended', usage };
+  const text = extractText(message);
+  if (text !== null) return { kind: 'text', text };
+  return { kind: 'other' };
+}
+
+/** Read the snake_case `agent_id` from a stream message (`status` or
+ *  `assistant`). Returns `null` for non-records or absent fields. */
+export function agentIdFromMessage(msg: unknown): string | null {
+  if (!isRecord(msg)) return null;
+  return asString(msg.agent_id);
+}
+
+/** Map the raw `wait()` result to the fields the harness consumes.
+ *  Real shape: `{ id, status, result, model, durationMs }`.
+ *  Degrades gracefully: non-records → `{ status: 'error', runId: null, durationMs: null }`. */
+export function outcomeFromResult(raw: unknown): {
+  status: 'finished' | 'error';
+  runId: string | null;
+  durationMs: number | null;
+} {
+  if (!isRecord(raw)) {
+    return { status: 'error', runId: null, durationMs: null };
+  }
+  const status: 'finished' | 'error' = raw.status === 'finished' ? 'finished' : 'error';
+  const runId = asString(raw.id);
+  const durationMs = typeof raw.durationMs === 'number' ? raw.durationMs : null;
+  return { status, runId, durationMs };
+}
diff --git a/skills-contrib/drive-judge-harness/test/claude-events.test.ts b/skills-contrib/drive-judge-harness/test/claude-events.test.ts
new file mode 100644
index 0000000000..f14a7e41e3
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/test/claude-events.test.ts
@@ -0,0 +1,176 @@
+import assert from 'node:assert/strict';
+import { describe, it } from 'node:test';
+import { outcomeFromResult, streamEventFromMessage, usageFromAssistant } from '../claude-events.ts';
+
+// Real shapes from @anthropic-ai/claude-agent-sdk (confirmed from SDK docs).
+// These tests must pass with @anthropic-ai/claude-agent-sdk NOT installed.
+
+const ASSISTANT_MESSAGE = {
+  type: 'assistant',
+  message: {
+    id: 'msg_01XFDUDYJgAACzvnptvVoYEL',
+    usage: {
+      input_tokens: 33,
+      output_tokens: 904,
+      cache_creation_input_tokens: 53995,
+      cache_read_input_tokens: 230827,
+    },
+  },
+};
+
+const SUCCESS_RESULT = {
+  type: 'result',
+  subtype: 'success',
+  session_id: 'sess-abc123',
+  duration_ms: 16025,
+  num_turns: 9,
+  total_cost_usd: 0.1839242,
+  usage: {
+    input_tokens: 33,
+    output_tokens: 904,
+    cache_creation_input_tokens: 53995,
+    cache_read_input_tokens: 230827,
+  },
+  result: 'done',
+};
+
+const ERROR_MAX_TURNS_RESULT = {
+  type: 'result',
+  subtype: 'error_max_turns',
+  session_id: 'sess-def456',
+  duration_ms: 8000,
+  num_turns: 5,
+  total_cost_usd: 0.05,
+  usage: {
+    input_tokens: 10,
+    output_tokens: 20,
+    cache_creation_input_tokens: 0,
+    cache_read_input_tokens: 0,
+  },
+  result: null,
+};
+
+describe('usageFromAssistant', () => {
+  it('maps all four fields from message.usage', () => {
+    const usage = usageFromAssistant(ASSISTANT_MESSAGE);
+    assert.ok(usage !== null);
+    assert.equal(usage.inputTokens, 33);
+    assert.equal(usage.outputTokens, 904);
+    assert.equal(usage.cacheWriteTokens, 53995);
+    assert.equal(usage.cacheReadTokens, 230827);
+  });
+
+  it('returns null for a non-assistant type', () => {
+    assert.equal(usageFromAssistant({ type: 'result', subtype: 'success' }), null);
+  });
+
+  it('returns null for an assistant message without usage', () => {
+    assert.equal(usageFromAssistant({ type: 'assistant', message: { id: 'x' } }), null);
+  });
+
+  it('returns null for a non-record', () => {
+    assert.equal(usageFromAssistant('junk'), null);
+    assert.equal(usageFromAssistant(null), null);
+    assert.equal(usageFromAssistant(42), null);
+  });
+});
+
+describe('streamEventFromMessage', () => {
+  it('maps an assistant message with usage to {kind:turn-ended}', () => {
+    const event = streamEventFromMessage(ASSISTANT_MESSAGE);
+    assert.equal(event.kind, 'turn-ended');
+    assert.ok(event.kind === 'turn-ended' && event.usage.inputTokens === 33);
+    assert.ok(event.kind === 'turn-ended' && event.usage.cacheWriteTokens === 53995);
+    assert.ok(event.kind === 'turn-ended' && event.usage.cacheReadTokens === 230827);
+  });
+
+  it('maps a result message to {kind:other}', () => {
+    const event = streamEventFromMessage(SUCCESS_RESULT);
+    assert.equal(event.kind, 'other');
+  });
+
+  it('maps junk to {kind:other}', () => {
+    assert.equal(streamEventFromMessage({ type: 'unknown' }).kind, 'other');
+    assert.equal(streamEventFromMessage(null).kind, 'other');
+  });
+});
+
+describe('outcomeFromResult', () => {
+  it('extracts all fields from a success result', () => {
+    const outcome = outcomeFromResult(SUCCESS_RESULT);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.status, 'finished');
+    assert.equal(outcome.runId, 'sess-abc123');
+    assert.equal(outcome.durationMs, 16025);
+    assert.equal(outcome.costUsd, 0.1839242);
+    assert.equal(outcome.numTurns, 9);
+  });
+
+  it('maps token fields correctly on a success result', () => {
+    const outcome = outcomeFromResult(SUCCESS_RESULT);
+    assert.ok(outcome !== null && outcome.tokens !== null);
+    assert.equal(outcome.tokens.inputTokens, 33);
+    assert.equal(outcome.tokens.outputTokens, 904);
+    assert.equal(outcome.tokens.cacheWriteTokens, 53995);
+    assert.equal(outcome.tokens.cacheReadTokens, 230827);
+    assert.equal(outcome.tokens.totalTokens, 33 + 904 + 53995 + 230827);
+  });
+
+  it('maps status=error for error_max_turns subtype', () => {
+    const outcome = outcomeFromResult(ERROR_MAX_TURNS_RESULT);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.status, 'error');
+    assert.equal(outcome.runId, 'sess-def456');
+    assert.equal(outcome.durationMs, 8000);
+    assert.equal(outcome.costUsd, 0.05);
+    assert.equal(outcome.numTurns, 5);
+  });
+
+  it('maps token fields for error_max_turns result', () => {
+    const outcome = outcomeFromResult(ERROR_MAX_TURNS_RESULT);
+    assert.ok(outcome !== null && outcome.tokens !== null);
+    assert.equal(outcome.tokens.inputTokens, 10);
+    assert.equal(outcome.tokens.outputTokens, 20);
+    assert.equal(outcome.tokens.cacheWriteTokens, 0);
+    assert.equal(outcome.tokens.cacheReadTokens, 0);
+    assert.equal(outcome.tokens.totalTokens, 30);
+  });
+
+  it('returns null for a non-result type', () => {
+    assert.equal(outcomeFromResult(ASSISTANT_MESSAGE), null);
+    assert.equal(outcomeFromResult({ type: 'status' }), null);
+  });
+
+  it('returns null for a non-record', () => {
+    assert.equal(outcomeFromResult('junk'), null);
+    assert.equal(outcomeFromResult(null), null);
+  });
+
+  it('sets tokens:null when usage is absent', () => {
+    const noUsage = { ...SUCCESS_RESULT, usage: undefined };
+    const outcome = outcomeFromResult(noUsage);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.tokens, null);
+  });
+
+  it('sets costUsd:null when total_cost_usd is absent', () => {
+    const { total_cost_usd: _c, ...noUsd } = SUCCESS_RESULT;
+    const outcome = outcomeFromResult(noUsd);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.costUsd, null);
+  });
+
+  it('sets durationMs:null when duration_ms is absent', () => {
+    const { duration_ms: _d, ...noDuration } = SUCCESS_RESULT;
+    const outcome = outcomeFromResult(noDuration);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.durationMs, null);
+  });
+
+  it('sets numTurns:null when num_turns is absent', () => {
+    const { num_turns: _n, ...noTurns } = SUCCESS_RESULT;
+    const outcome = outcomeFromResult(noTurns);
+    assert.ok(outcome !== null);
+    assert.equal(outcome.numTurns, null);
+  });
+});
diff --git a/skills-contrib/drive-judge-harness/test/collect-run.test.ts b/skills-contrib/drive-judge-harness/test/collect-run.test.ts
index f54a0cc01e..dc005efca6 100644
--- a/skills-contrib/drive-judge-harness/test/collect-run.test.ts
+++ b/skills-contrib/drive-judge-harness/test/collect-run.test.ts
@@ -61,6 +61,7 @@ function fakePrepared(overrides?: Partial<PreparedRun>): PreparedRun {
     skillBundleSha: 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb',
     prepareCommit,
     materialized: true,
+    preexistingTracePaths: [],
     ...overrides,
   };
 }
@@ -128,6 +129,87 @@ describe('collectRun — trace collection', () => {
   });
 });
 
+describe('collectRun — preexistingTracePaths exclusion', () => {
+  it('returns only the run-emitted trace, not the pre-existing baseline trace', () => {
+    const baselinePath = join(runDir, 'baseline-trace.jsonl');
+    const runEmittedPath = join(runDir, 'run-emitted-trace.jsonl');
+
+    writeFileSync(baselinePath, `${VALID_TRACE_LINE}\n`);
+    writeFileSync(runEmittedPath, `${VALID_TRACE_LINE}\n`);
+
+    // Simulate: baseline-trace was present before the run started
+    const result = collectRun(fakePrepared({ preexistingTracePaths: [baselinePath] }));
+
+    assert.equal(result.tracePaths.length, 1, 'only one trace should be returned');
+    assert.ok(
+      result.tracePaths[0]?.endsWith('run-emitted-trace.jsonl'),
+      'the returned trace must be the run-emitted one',
+    );
+    assert.ok(
+      !result.tracePaths.some((p) => p.endsWith('baseline-trace.jsonl')),
+      'the baseline-committed trace must not appear in results',
+    );
+    assert.equal(result.untraced, false);
+  });
+
+  it('returns no traces when every valid jsonl is listed in preexistingTracePaths', () => {
+    const baselinePath = join(runDir, 'old-trace.jsonl');
+    writeFileSync(baselinePath, `${VALID_TRACE_LINE}\n`);
+
+    const result = collectRun(fakePrepared({ preexistingTracePaths: [baselinePath] }));
+
+    assert.equal(result.tracePaths.length, 0);
+    assert.equal(result.matchedTrace, null);
+    assert.equal(result.untraced, true);
+  });
+
+  it('agent_id matching runs over the run-emitted set only', () => {
+    const baselinePath = join(runDir, 'baseline-trace.jsonl');
+    const runEmittedPath = join(runDir, 'run-trace.jsonl');
+
+    // Both are valid traces but with different agent IDs.
+    const baselineLine = JSON.stringify({
+      event_id: 'e1',
+      schema_version: '1',
+      ts: '2026-05-31T00:00:00.000Z',
+      project_run_id: 'proj-base',
+      orchestrator_agent_id: 'agent-baseline',
+      event_type: 'dispatch-start',
+      dispatch_id: 'd1',
+      dispatch_name: 'baseline',
+      subagent_type: 'generalPurpose',
+      model: null,
+      parent_dispatch_id: null,
+    });
+    const runLine = JSON.stringify({
+      event_id: 'e2',
+      schema_version: '1',
+      ts: '2026-05-31T00:00:00.000Z',
+      project_run_id: 'proj-run',
+      orchestrator_agent_id: 'agent-run',
+      event_type: 'dispatch-start',
+      dispatch_id: 'd2',
+      dispatch_name: 'run',
+      subagent_type: 'generalPurpose',
+      model: null,
+      parent_dispatch_id: null,
+    });
+
+    writeFileSync(baselinePath, `${baselineLine}\n`);
+    writeFileSync(runEmittedPath, `${runLine}\n`);
+
+    const result = collectRun(fakePrepared({ preexistingTracePaths: [baselinePath] }), {
+      agentId: 'agent-baseline',
+    });
+
+    // 'agent-baseline' is only in the preexisting trace; the run-emitted set has
+    // only 'agent-run'. The exclusion must happen before matching.
+    assert.equal(result.tracePaths.length, 1);
+    assert.ok(result.tracePaths[0]?.endsWith('run-trace.jsonl'));
+    assert.ok(result.matchedTrace?.endsWith('run-trace.jsonl'));
+  });
+});
+
 describe('collectRun — diff excludes injected skill files (baseline-commit cut point)', () => {
   it('diff against prepareCommit omits skill bundle files committed at baseline', () => {
     // Agent changes: only a source file — not the skill files
diff --git a/skills-contrib/drive-judge-harness/test/manifest.test.ts b/skills-contrib/drive-judge-harness/test/manifest.test.ts
index 05a855571d..f4e98c1201 100644
--- a/skills-contrib/drive-judge-harness/test/manifest.test.ts
+++ b/skills-contrib/drive-judge-harness/test/manifest.test.ts
@@ -19,11 +19,15 @@ const dryRunManifest: RunManifest = {
   schema_version: '1',
   case_slug: 'slice-dedupe-generated-imports',
   model: 'claude-4.6-sonnet-high-thinking',
+  runtime: 'claude',
   status: 'dry-run',
   run_id: null,
   agent_id: null,
   trace_file: 'projects/x/trace.jsonl',
   tokens: null,
+  wall_clock_ms: null,
+  cost_usd: null,
+  num_turns: null,
   started_at: '2026-05-30T00:00:00.000Z',
   finished_at: null,
   notes: ['dry-run: live execution gate not satisfied'],
@@ -59,6 +63,7 @@ describe('writeManifest', () => {
       status: 'finished',
       run_id: 'run-1',
       agent_id: 'agent-1',
+      wall_clock_ms: 5000,
       finished_at: '2026-05-30T00:10:00.000Z',
       tokens: {
         inputTokens: 100,
diff --git a/skills-contrib/drive-judge-harness/test/prepare-run.test.ts b/skills-contrib/drive-judge-harness/test/prepare-run.test.ts
index d892b111fc..24ed7963b4 100644
--- a/skills-contrib/drive-judge-harness/test/prepare-run.test.ts
+++ b/skills-contrib/drive-judge-harness/test/prepare-run.test.ts
@@ -162,6 +162,37 @@ describe('prepareRun', () => {
     const prepared = prepareRun(config, { materialize: mockMaterialize });
     assert.equal(prepared.prepareCommit.length, 40);
   });
+
+  it('preexistingTracePaths is empty when the base checkout has no .jsonl files', () => {
+    const config: PrepareRunConfig = {
+      repoUnderTestDir: repoDir,
+      baseRef,
+      skillBundle: { repoDir, ref: bundleRef },
+      runDir,
+    };
+    const prepared = prepareRun(config, { materialize: mockMaterialize });
+    assert.ok(Array.isArray(prepared.preexistingTracePaths));
+    assert.equal(prepared.preexistingTracePaths.length, 0);
+  });
+
+  it('preexistingTracePaths lists committed .jsonl files present at baseline', () => {
+    // Add a .jsonl to the base checkout so it's in the worktree after prepare-run
+    mkdirSync(join(repoDir, 'wip', 'drive-trace'), { recursive: true });
+    writeFileSync(join(repoDir, 'wip', 'drive-trace', 'old-trace.jsonl'), '{"event_id":"e0"}\n');
+    gitIn(repoDir, 'add', '-A');
+    gitIn(repoDir, 'commit', '-m', 'add old trace');
+    const baseRefWithTrace = gitIn(repoDir, 'rev-parse', 'HEAD');
+
+    const config: PrepareRunConfig = {
+      repoUnderTestDir: repoDir,
+      baseRef: baseRefWithTrace,
+      skillBundle: { repoDir, ref: bundleRef },
+      runDir,
+    };
+    const prepared = prepareRun(config, { materialize: mockMaterialize });
+    assert.equal(prepared.preexistingTracePaths.length, 1);
+    assert.ok(prepared.preexistingTracePaths[0]?.endsWith('old-trace.jsonl'));
+  });
 });
 
 describe('prepareRun + collectRun — empty-overlay cut point', () => {
diff --git a/skills-contrib/drive-judge-harness/test/run-arm.test.ts b/skills-contrib/drive-judge-harness/test/run-arm.test.ts
index 946b82fc8c..00ba5a3889 100644
--- a/skills-contrib/drive-judge-harness/test/run-arm.test.ts
+++ b/skills-contrib/drive-judge-harness/test/run-arm.test.ts
@@ -66,7 +66,15 @@ function mockRun(): OrchestratorRun {
   return {
     async *stream() {},
     async wait() {
-      return { status: 'finished', runId: 'run-1', agentId: 'agent-1' };
+      return {
+        status: 'finished' as const,
+        runId: 'run-1',
+        agentId: 'agent-1',
+        durationMs: null,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      };
     },
   };
 }
@@ -85,6 +93,7 @@ function baseConfig(): RunArmConfig {
     manifestFile: join(tmpDir, 'run.json'),
     live: true,
     apiKeyPresent: true,
+    runtime: 'claude',
   };
 }
 
diff --git a/skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts b/skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts
index 0037f562a2..aa5e6b066f 100644
--- a/skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts
+++ b/skills-contrib/drive-judge-harness/test/run-one-brief-cwd.test.ts
@@ -26,7 +26,15 @@ function mockRun(): OrchestratorRun {
   return {
     async *stream() {},
     async wait() {
-      return { status: 'finished', runId: null, agentId: null };
+      return {
+        status: 'finished' as const,
+        runId: null,
+        agentId: null,
+        durationMs: null,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      };
     },
   };
 }
@@ -48,6 +56,7 @@ describe('runOneBrief — cwd thread-through', () => {
         runDir,
         live: true,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent },
     );
@@ -71,6 +80,7 @@ describe('runOneBrief — cwd thread-through', () => {
         runDir,
         live: false,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent },
     );
diff --git a/skills-contrib/drive-judge-harness/test/run-one-brief.test.ts b/skills-contrib/drive-judge-harness/test/run-one-brief.test.ts
index 8c9a4bfbae..235cde360a 100644
--- a/skills-contrib/drive-judge-harness/test/run-one-brief.test.ts
+++ b/skills-contrib/drive-judge-harness/test/run-one-brief.test.ts
@@ -9,9 +9,11 @@ import {
   assemblePrompt,
   type CreateAgent,
   type OrchestratorRun,
+  type RunOutcome,
   type RunStreamEvent,
   runOneBrief,
 } from '../run-one-brief.ts';
+import type { TokenTotals } from '../usage.ts';
 
 const GOLDEN_DIR = fileURLToPath(
   new URL('../../../projects/drive-judge-harness/assets/golden/', import.meta.url),
@@ -28,11 +30,18 @@ afterEach(() => {
 
 const FIXED_NOW = () => '2026-05-30T12:00:00.000Z';
 
+const NULL_OUTCOME: RunOutcome = {
+  status: 'finished',
+  runId: null,
+  agentId: null,
+  durationMs: null,
+  tokens: null,
+  costUsd: null,
+  numTurns: null,
+};
+
 /** A mock orchestrator run that yields synthetic stream events — no network. */
-function mockRun(
-  events: RunStreamEvent[],
-  outcome: Awaited<ReturnType<OrchestratorRun['wait']>>,
-): OrchestratorRun {
+function mockRun(events: RunStreamEvent[], outcome: RunOutcome): OrchestratorRun {
   return {
     async *stream() {
       for (const e of events) yield e;
@@ -48,7 +57,7 @@ describe('runOneBrief — dry-run gate', () => {
     let called = false;
     const createAgent: CreateAgent = async () => {
       called = true;
-      return mockRun([], { status: 'finished', runId: null, agentId: null });
+      return mockRun([], NULL_OUTCOME);
     };
     const result = await runOneBrief(
       {
@@ -59,6 +68,7 @@ describe('runOneBrief — dry-run gate', () => {
         runDir: dir,
         live: false,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -66,13 +76,14 @@ describe('runOneBrief — dry-run gate', () => {
     assert.equal(result.createAgentCalled, false);
     assert.equal(result.status, 'dry-run');
     assert.equal(result.manifest.tokens, null);
+    assert.equal(result.manifest.runtime, 'claude');
   });
 
-  it('does not call createAgent when live is true but no API key', async () => {
+  it('does not call createAgent when live is true but no API key (cursor runtime)', async () => {
     let called = false;
     const createAgent: CreateAgent = async () => {
       called = true;
-      return mockRun([], { status: 'finished', runId: null, agentId: null });
+      return mockRun([], NULL_OUTCOME);
     };
     const result = await runOneBrief(
       {
@@ -83,6 +94,7 @@ describe('runOneBrief — dry-run gate', () => {
         runDir: dir,
         live: true,
         apiKeyPresent: false,
+        runtime: 'cursor',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -91,6 +103,30 @@ describe('runOneBrief — dry-run gate', () => {
     assert.match(result.manifest.notes.join(' '), /CURSOR_API_KEY is absent/);
   });
 
+  it('does not call createAgent when live is true but no API key (claude runtime)', async () => {
+    let called = false;
+    const createAgent: CreateAgent = async () => {
+      called = true;
+      return mockRun([], NULL_OUTCOME);
+    };
+    const result = await runOneBrief(
+      {
+        caseDir: CASE_DIR,
+        traceFile: join(dir, 'trace.jsonl'),
+        manifestFile: join(dir, 'run.json'),
+        model: 'pinned-model',
+        runDir: dir,
+        live: true,
+        apiKeyPresent: false,
+        runtime: 'claude',
+      },
+      { createAgent, now: FIXED_NOW },
+    );
+    assert.equal(called, false);
+    assert.equal(result.status, 'dry-run');
+    assert.match(result.manifest.notes.join(' '), /ANTHROPIC_API_KEY is absent/);
+  });
+
   it('writes a dry-run manifest to disk', async () => {
     const manifestFile = join(dir, 'run.json');
     await runOneBrief(
@@ -102,6 +138,7 @@ describe('runOneBrief — dry-run gate', () => {
         runDir: dir,
         live: false,
         apiKeyPresent: false,
+        runtime: 'claude',
       },
       { now: FIXED_NOW },
     );
@@ -110,6 +147,9 @@ describe('runOneBrief — dry-run gate', () => {
     assert.equal(parsed.case_slug, 'slice-dedupe-generated-imports');
     assert.equal(parsed.model, 'pinned-model');
     assert.equal(parsed.tokens, null);
+    assert.equal(parsed.runtime, 'claude');
+    assert.equal(parsed.cost_usd, null);
+    assert.equal(parsed.num_turns, null);
   });
 });
 
@@ -125,7 +165,15 @@ describe('runOneBrief — live path with mock SDK', () => {
       { kind: 'turn-ended', usage: { inputTokens: 50, outputTokens: 20 } },
     ];
     const createAgent: CreateAgent = async () =>
-      mockRun(events, { status: 'finished', runId: 'run-42', agentId: 'agent-42' });
+      mockRun(events, {
+        status: 'finished',
+        runId: 'run-42',
+        agentId: 'agent-42',
+        durationMs: null,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      });
 
     const manifestFile = join(dir, 'run.json');
     const result = await runOneBrief(
@@ -137,6 +185,7 @@ describe('runOneBrief — live path with mock SDK', () => {
         runDir: dir,
         live: true,
         apiKeyPresent: true,
+        runtime: 'cursor',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -148,6 +197,7 @@ describe('runOneBrief — live path with mock SDK', () => {
     assert.equal(result.manifest.tokens?.outputTokens, 60);
     assert.equal(result.manifest.tokens?.totalTokens, 225);
     assert.equal(result.manifest.run_id, 'run-42');
+    assert.equal(result.manifest.runtime, 'cursor');
 
     const parsed = JSON.parse(readFileSync(manifestFile, 'utf8'));
     assert.equal(parsed.tokens.totalTokens, 225);
@@ -166,6 +216,7 @@ describe('runOneBrief — live path with mock SDK', () => {
         runDir: dir,
         live: true,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -179,7 +230,15 @@ describe('runOneBrief — live path with mock SDK', () => {
       { kind: 'turn-ended', usage: { inputTokens: 10, outputTokens: 2 } },
     ];
     const createAgent: CreateAgent = async () =>
-      mockRun(events, { status: 'error', runId: 'run-err', agentId: null });
+      mockRun(events, {
+        status: 'error',
+        runId: 'run-err',
+        agentId: null,
+        durationMs: null,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      });
     const result = await runOneBrief(
       {
         caseDir: CASE_DIR,
@@ -189,6 +248,7 @@ describe('runOneBrief — live path with mock SDK', () => {
         runDir: dir,
         live: true,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -203,7 +263,15 @@ describe('runOneBrief — live path with mock SDK', () => {
         throw new Error('stream died');
       },
       async wait() {
-        return { status: 'finished', runId: 'unreached', agentId: null };
+        return {
+          status: 'finished' as const,
+          runId: 'unreached',
+          agentId: null,
+          durationMs: null,
+          tokens: null,
+          costUsd: null,
+          numTurns: null,
+        };
       },
     });
     const manifestFile = join(dir, 'run.json');
@@ -216,6 +284,7 @@ describe('runOneBrief — live path with mock SDK', () => {
         runDir: dir,
         live: true,
         apiKeyPresent: true,
+        runtime: 'claude',
       },
       { createAgent, now: FIXED_NOW },
     );
@@ -226,6 +295,118 @@ describe('runOneBrief — live path with mock SDK', () => {
     const parsed = JSON.parse(readFileSync(manifestFile, 'utf8'));
     assert.equal(parsed.status, 'error');
   });
+
+  it('captures agent_id and wall_clock_ms from the outcome, and notes null tokens', async () => {
+    const createAgent: CreateAgent = async () =>
+      mockRun([], {
+        status: 'finished',
+        runId: 'run-live-1',
+        agentId: 'agent-live-1',
+        durationMs: 87654,
+        tokens: null,
+        costUsd: null,
+        numTurns: null,
+      });
+
+    const result = await runOneBrief(
+      {
+        caseDir: CASE_DIR,
+        traceFile: join(dir, 'trace.jsonl'),
+        manifestFile: join(dir, 'run.json'),
+        model: 'pinned-model',
+        runDir: dir,
+        live: true,
+        apiKeyPresent: true,
+        runtime: 'cursor',
+      },
+      { createAgent, now: FIXED_NOW },
+    );
+
+    assert.equal(result.status, 'finished');
+    assert.equal(result.manifest.agent_id, 'agent-live-1');
+    assert.equal(result.manifest.wall_clock_ms, 87654);
+    assert.equal(result.manifest.tokens, null);
+    assert.ok(
+      result.manifest.notes.some((n) =>
+        n.includes('tokens unavailable: @cursor/sdk local runtime emits no usage events'),
+      ),
+    );
+  });
+
+  it('prefers outcome.tokens over per-turn accumulation when the runtime provides them', async () => {
+    const runtimeTokens: TokenTotals = {
+      inputTokens: 33,
+      outputTokens: 904,
+      cacheReadTokens: 230827,
+      cacheWriteTokens: 53995,
+      totalTokens: 285759,
+    };
+    // Also emit a per-turn event with different values to confirm outcome wins.
+    const events: RunStreamEvent[] = [
+      { kind: 'turn-ended', usage: { inputTokens: 1, outputTokens: 1 } },
+    ];
+    const createAgent: CreateAgent = async () =>
+      mockRun(events, {
+        status: 'finished',
+        runId: 'sess-abc',
+        agentId: null,
+        durationMs: 16025,
+        tokens: runtimeTokens,
+        costUsd: 0.1839242,
+        numTurns: 9,
+      });
+
+    const manifestFile = join(dir, 'run.json');
+    const result = await runOneBrief(
+      {
+        caseDir: CASE_DIR,
+        traceFile: join(dir, 'trace.jsonl'),
+        manifestFile,
+        model: 'pinned-model',
+        runDir: dir,
+        live: true,
+        apiKeyPresent: true,
+        runtime: 'claude',
+      },
+      { createAgent, now: FIXED_NOW },
+    );
+
+    assert.equal(result.status, 'finished');
+    assert.equal(result.manifest.runtime, 'claude');
+    // outcome.tokens takes priority over accumulated per-turn totals
+    assert.equal(result.manifest.tokens?.inputTokens, 33);
+    assert.equal(result.manifest.tokens?.outputTokens, 904);
+    assert.equal(result.manifest.tokens?.totalTokens, 285759);
+    assert.equal(result.manifest.cost_usd, 0.1839242);
+    assert.equal(result.manifest.num_turns, 9);
+    assert.equal(result.manifest.wall_clock_ms, 16025);
+    assert.equal(result.manifest.notes.length, 0, 'no notes when tokens are present');
+
+    const parsed = JSON.parse(readFileSync(manifestFile, 'utf8'));
+    assert.equal(parsed.runtime, 'claude');
+    assert.equal(parsed.tokens.totalTokens, 285759);
+    assert.equal(parsed.cost_usd, 0.1839242);
+    assert.equal(parsed.num_turns, 9);
+    assert.equal(parsed.wall_clock_ms, 16025);
+  });
+
+  it('runtime:cursor produces runtime:cursor in the manifest', async () => {
+    const createAgent: CreateAgent = async () => mockRun([], NULL_OUTCOME);
+    const result = await runOneBrief(
+      {
+        caseDir: CASE_DIR,
+        traceFile: join(dir, 'trace.jsonl'),
+        manifestFile: join(dir, 'run.json'),
+        model: 'pinned-model',
+        runDir: dir,
+        live: true,
+        apiKeyPresent: true,
+        runtime: 'cursor',
+      },
+      { createAgent, now: FIXED_NOW },
+    );
+    assert.equal(result.manifest.runtime, 'cursor');
+  });
 });
 
 describe('assemblePrompt', () => {
diff --git a/skills-contrib/drive-judge-harness/test/sdk-events.test.ts b/skills-contrib/drive-judge-harness/test/sdk-events.test.ts
new file mode 100644
index 0000000000..4add9598d6
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/test/sdk-events.test.ts
@@ -0,0 +1,147 @@
+import assert from 'node:assert/strict';
+import { describe, it } from 'node:test';
+import {
+  agentIdFromMessage,
+  isRecord,
+  outcomeFromResult,
+  streamEventFromMessage,
+} from '../sdk-events.ts';
+
+// Real shapes from @cursor/sdk@1.0.15 local runtime (captured via probe).
+// Tests here must pass with @cursor/sdk NOT installed.
+
+const STATUS_MESSAGE = {
+  type: 'status',
+  agent_id: 'agent-abc123',
+  run_id: 'run-xyz789',
+  status: 'running',
+};
+
+const ASSISTANT_MESSAGE = {
+  type: 'assistant',
+  agent_id: 'agent-abc123',
+  run_id: 'run-xyz789',
+  message: {
+    content: [{ type: 'text', text: 'Hello from the orchestrator.' }],
+  },
+};
+
+const WAIT_OUTCOME = {
+  id: 'run-xyz789',
+  status: 'finished',
+  result: 'done',
+  model: 'composer-2.5-fast',
+  durationMs: 42500,
+};
+
+describe('agentIdFromMessage', () => {
+  it('reads agent_id from a status message', () => {
+    assert.equal(agentIdFromMessage(STATUS_MESSAGE), 'agent-abc123');
+  });
+
+  it('reads agent_id from an assistant message', () => {
+    assert.equal(agentIdFromMessage(ASSISTANT_MESSAGE), 'agent-abc123');
+  });
+
+  it('returns null for the wait() outcome (no agent_id field)', () => {
+    assert.equal(agentIdFromMessage(WAIT_OUTCOME), null);
+  });
+
+  it('returns null for a non-object (string)', () => {
+    assert.equal(agentIdFromMessage('junk'), null);
+  });
+
+  it('returns null for a non-object (null)', () => {
+    assert.equal(agentIdFromMessage(null), null);
+  });
+
+  it('returns null for a record with no agent_id', () => {
+    assert.equal(agentIdFromMessage({ type: 'other' }), null);
+  });
+});
+
+describe('outcomeFromResult', () => {
+  it('extracts runId, status=finished, and durationMs from the real outcome shape', () => {
+    const result = outcomeFromResult(WAIT_OUTCOME);
+    assert.equal(result.status, 'finished');
+    assert.equal(result.runId, 'run-xyz789');
+    assert.equal(result.durationMs, 42500);
+  });
+
+  it('maps status=error for a non-finished status', () => {
+    const result = outcomeFromResult({ ...WAIT_OUTCOME, status: 'failed' });
+    assert.equal(result.status, 'error');
+  });
+
+  it('returns durationMs:null when durationMs is absent', () => {
+    const { durationMs: _d, ...withoutDuration } = WAIT_OUTCOME;
+    const result = outcomeFromResult(withoutDuration);
+    assert.equal(result.durationMs, null);
+  });
+
+  it('returns durationMs:null when durationMs is not a number', () => {
+    const result = outcomeFromResult({ ...WAIT_OUTCOME, durationMs: 'not-a-number' });
+    assert.equal(result.durationMs, null);
+  });
+
+  it('degrades to {status:error, runId:null, durationMs:null} for a non-record', () => {
+    const result = outcomeFromResult('not-an-object');
+    assert.equal(result.status, 'error');
+    assert.equal(result.runId, null);
+    assert.equal(result.durationMs, null);
+  });
+
+  it('degrades to {status:error, runId:null, durationMs:null} for null', () => {
+    const result = outcomeFromResult(null);
+    assert.equal(result.status, 'error');
+    assert.equal(result.runId, null);
+    assert.equal(result.durationMs, null);
+  });
+});
+
+describe('streamEventFromMessage', () => {
+  it('maps a status message to {kind:other} (no usage, no assistant text)', () => {
+    const event = streamEventFromMessage(STATUS_MESSAGE);
+    assert.equal(event.kind, 'other');
+  });
+
+  it('maps an assistant message with text content to {kind:text}', () => {
+    const event = streamEventFromMessage(ASSISTANT_MESSAGE);
+    assert.equal(event.kind, 'text');
+    assert.ok(event.kind === 'text' && event.text.includes('Hello from the orchestrator.'));
+  });
+
+  it('maps a turn-ended message with usage to {kind:turn-ended}', () => {
+    const turnEndedMsg = {
+      usage: { inputTokens: 100, outputTokens: 40, cacheReadTokens: 0, cacheWriteTokens: 0 },
+    };
+    const event = streamEventFromMessage(turnEndedMsg);
+    assert.equal(event.kind, 'turn-ended');
+    assert.ok(event.kind === 'turn-ended' && event.usage.inputTokens === 100);
+  });
+
+  it('maps junk to {kind:other}', () => {
+    const event = streamEventFromMessage({ type: 'unknown-event' });
+    assert.equal(event.kind, 'other');
+  });
+});
+
+describe('isRecord', () => {
+  it('returns true for plain objects', () => {
+    assert.equal(isRecord({}), true);
+    assert.equal(isRecord({ a: 1 }), true);
+  });
+
+  it('returns false for arrays', () => {
+    assert.equal(isRecord([]), false);
+  });
+
+  it('returns false for null', () => {
+    assert.equal(isRecord(null), false);
+  });
+
+  it('returns false for primitives', () => {
+    assert.equal(isRecord('string'), false);
+    assert.equal(isRecord(42), false);
+  });
+});
diff --git a/skills-contrib/drive-judge-harness/trace-files.ts b/skills-contrib/drive-judge-harness/trace-files.ts
new file mode 100644
index 0000000000..efc7cf15b2
--- /dev/null
+++ b/skills-contrib/drive-judge-harness/trace-files.ts
@@ -0,0 +1,23 @@
+import { type Dirent, readdirSync } from 'node:fs';
+import { join } from 'pathe';
+
+/** Recursively collect all `.jsonl` file paths under `dir`.
+ *  Returns an empty array when `dir` does not exist or cannot be read. */
+export function findJsonlFiles(dir: string): string[] {
+  const results: string[] = [];
+  let entries: Dirent[];
+  try {
+    entries = readdirSync(dir, { withFileTypes: true });
+  } catch {
+    return results;
+  }
+  for (const entry of entries) {
+    const fullPath = join(dir, entry.name);
+    if (entry.isDirectory()) {
+      results.push(...findJsonlFiles(fullPath));
+    } else if (entry.isFile() && entry.name.endsWith('.jsonl')) {
+      results.push(fullPath);
+    }
+  }
+  return results;
+}