From 3bae3ebc49404146038c31db59dc72dc1a20cbee Mon Sep 17 00:00:00 2001 From: Kai Liu Date: Tue, 16 Jun 2026 09:25:15 +0800 Subject: [PATCH] Remove hard agent-turn timeout from cron/task schedulers Cron jobs and one-time tasks both wrapped `agent.sendMessage` in a 2h hard timeout (`ODE_CRON_AGENT_TIMEOUT_MS` / `ODE_TASK_AGENT_TIMEOUT_MS`). Long-running agent workflows (Sentry triage, board grooming, multi-PR sweeps, scheduled audits) routinely tripped this cap and surfaced as "Request timed out" failures even though the underlying agent was making progress. Drop the wrap on both sides. We still keep the prepare-step timeout (session creation + worktree setup) so a wedged setup can't hold the in-process `runningJobIds`/`runningTaskIds` lock; truly hung turns are still recoverable via the daemon restart reconcile path. --- packages/core/cron/scheduler.ts | 37 ++++++++++++++----------------- packages/core/tasks/scheduler.ts | 38 +++++++++++++------------------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/packages/core/cron/scheduler.ts b/packages/core/cron/scheduler.ts index 3eb9efa..a72a561 100644 --- a/packages/core/cron/scheduler.ts +++ b/packages/core/cron/scheduler.ts @@ -54,15 +54,16 @@ const CRON_PREPARE_TIMEOUT_MS = parsePositiveIntEnv( 2 * 60_000 ); -/** - * Hard upper bound for the actual agent turn (`agent.sendMessage`). OpenCode - * sessions can wedge waiting on approvals or remote provider calls; we bound - * the run so a stuck turn doesn't permanently lock out the job. - */ -const CRON_AGENT_TIMEOUT_MS = parsePositiveIntEnv( - process.env.ODE_CRON_AGENT_TIMEOUT_MS, - 2 * 60 * 60_000 -); +// NOTE: There is intentionally no hard upper bound on the agent turn itself +// (`agent.sendMessage`) anymore. Cron jobs that drive long agent workflows +// (Sentry triage, board grooming, multi-PR sweeps) routinely exceeded the +// previous 2h bound and surfaced as `Request timed out` failures even though +// the underlying agent was making progress. We rely on: +// * the prepare-step timeout above to guarantee the in-process +// `runningJobIds` lock can't be wedged by a hung session/worktree setup; +// * the agent adapter's own per-request handling for genuinely stuck turns. +// If a turn really hangs forever, the daemon restart path +// (`reconcileInterruptedCronJobs`) will still surface and retry it. function parsePositiveIntEnv(raw: string | undefined, fallback: number): number { if (!raw) return fallback; @@ -336,17 +337,13 @@ async function runCronJob(job: CronJobRecord, minuteStartMs: number): Promise { }); } - const responses = await withTimeout( - agent.sendMessage( - task.channelId, - sessionId, - task.messageText, - cwd, - options, - buildTaskAgentContext(task), - ), - TASK_AGENT_TIMEOUT_MS, - "Task agent turn", + const responses = await agent.sendMessage( + task.channelId, + sessionId, + task.messageText, + cwd, + options, + buildTaskAgentContext(task), ); const finalText = buildFinalResponseText(responses) ?? "_Done_";