From 6e7e2b232084d16bf16a0353b0a2e2a72b6d6835 Mon Sep 17 00:00:00 2001 From: PeninsulaInd Date: Fri, 20 Mar 2026 12:52:04 -0500 Subject: [PATCH] Add configurable per-task timeouts (env vars, config.yaml, defaults) Execution brain timeout bumped from 15 min to 45 min default to avoid content writing timeouts. BLM stays at 30 min. Both configurable via CHEDDAH_TIMEOUT_EXECUTION_BRAIN / CHEDDAH_TIMEOUT_BLM env vars or config.yaml timeouts section. Co-Authored-By: Claude Opus 4.6 (1M context) --- cheddahbot/agent.py | 5 ++++- cheddahbot/config.py | 17 +++++++++++++++++ cheddahbot/llm.py | 7 +++++-- cheddahbot/tools/linkbuilding.py | 26 ++++++++++++++++++-------- 4 files changed, 44 insertions(+), 11 deletions(-) diff --git a/cheddahbot/agent.py b/cheddahbot/agent.py index dfda94f..b0f552d 100644 --- a/cheddahbot/agent.py +++ b/cheddahbot/agent.py @@ -382,7 +382,10 @@ class Agent: skip_permissions: If True, run CLI with --dangerously-skip-permissions. """ log.info("Execution brain task: %s", prompt[:100]) - kwargs: dict = {"system_prompt": system_context} + kwargs: dict = { + "system_prompt": system_context, + "timeout": self.config.timeouts.execution_brain, + } if tools: kwargs["tools"] = tools if model: diff --git a/cheddahbot/config.py b/cheddahbot/config.py index 024543d..fec8496 100644 --- a/cheddahbot/config.py +++ b/cheddahbot/config.py @@ -98,6 +98,12 @@ class ApiBudgetConfig: alert_threshold: float = 0.8 # alert at 80% of limit +@dataclass +class TimeoutConfig: + execution_brain: int = 2700 # 45 minutes + blm: int = 1800 # 30 minutes + + @dataclass class ContentConfig: cora_inbox: str = "" # e.g. "Z:/content-cora-inbox" @@ -157,6 +163,7 @@ class Config: autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig) api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig) content: ContentConfig = field(default_factory=ContentConfig) + timeouts: TimeoutConfig = field(default_factory=TimeoutConfig) ntfy: NtfyConfig = field(default_factory=NtfyConfig) agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()]) @@ -221,6 +228,10 @@ def load_config() -> Config: for k, v in data["content"].items(): if hasattr(cfg.content, k): setattr(cfg.content, k, v) + if "timeouts" in data and isinstance(data["timeouts"], dict): + for k, v in data["timeouts"].items(): + if hasattr(cfg.timeouts, k): + setattr(cfg.timeouts, k, int(v)) # ntfy push notifications if "ntfy" in data and isinstance(data["ntfy"], dict): @@ -288,6 +299,12 @@ def load_config() -> Config: if blm_dir := os.getenv("BLM_DIR"): cfg.link_building.blm_dir = blm_dir + # Timeout env var overrides (seconds) + if t := os.getenv("CHEDDAH_TIMEOUT_EXECUTION_BRAIN"): + cfg.timeouts.execution_brain = int(t) + if t := os.getenv("CHEDDAH_TIMEOUT_BLM"): + cfg.timeouts.blm = int(t) + # Ensure data directories exist cfg.data_dir.mkdir(parents=True, exist_ok=True) (cfg.data_dir / "uploads").mkdir(exist_ok=True) diff --git a/cheddahbot/llm.py b/cheddahbot/llm.py index 17ae905..b1cc4ea 100644 --- a/cheddahbot/llm.py +++ b/cheddahbot/llm.py @@ -157,6 +157,7 @@ class LLMAdapter: tools: str = "Bash,Read,Edit,Write,Glob,Grep", model: str | None = None, skip_permissions: bool = False, + timeout: int = 2700, ) -> str: """Execution brain: calls Claude Code CLI with full tool access. @@ -167,6 +168,7 @@ class LLMAdapter: tools: Comma-separated Claude Code tool names (default: standard set). model: Override the CLI model (e.g. "claude-sonnet-4.5"). skip_permissions: If True, append --dangerously-skip-permissions to + timeout: Max seconds to wait for CLI completion (default: 2700 / 45 min). the CLI invocation (used for automated pipelines). """ claude_bin = shutil.which("claude") @@ -218,10 +220,11 @@ class LLMAdapter: ) try: - stdout, stderr = proc.communicate(input=prompt, timeout=900) + stdout, stderr = proc.communicate(input=prompt, timeout=timeout) except subprocess.TimeoutExpired: proc.kill() - return "Error: Claude Code execution timed out after 15 minutes." + minutes = timeout // 60 + return f"Error: Claude Code execution timed out after {minutes} minutes." if proc.returncode != 0: return f"Execution error: {stderr or 'unknown error'}" diff --git a/cheddahbot/tools/linkbuilding.py b/cheddahbot/tools/linkbuilding.py index a951b3d..9b5b552 100644 --- a/cheddahbot/tools/linkbuilding.py +++ b/cheddahbot/tools/linkbuilding.py @@ -30,6 +30,13 @@ def _get_blm_dir(ctx: dict | None) -> str: return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man") +def _get_blm_timeout(ctx: dict | None) -> int: + """Get BLM subprocess timeout from config or default (1800s / 30 min).""" + if ctx and "config" in ctx: + return ctx["config"].timeouts.blm + return 1800 + + def _run_blm_command( args: list[str], blm_dir: str, timeout: int = 1800 ) -> subprocess.CompletedProcess: @@ -446,10 +453,11 @@ def run_cora_backlinks( cli_flags=cli_flags, ) + blm_timeout = _get_blm_timeout(ctx) try: - ingest_result = _run_blm_command(ingest_args, blm_dir) + ingest_result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout) except subprocess.TimeoutExpired: - error = "ingest-cora timed out after 30 minutes" + error = f"ingest-cora timed out after {blm_timeout // 60} minutes" _set_status(ctx, "") if clickup_task_id: _fail_clickup_task(ctx, clickup_task_id, error) @@ -494,9 +502,9 @@ def run_cora_backlinks( gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"] try: - gen_result = _run_blm_command(gen_args, blm_dir) + gen_result = _run_blm_command(gen_args, blm_dir, timeout=blm_timeout) except subprocess.TimeoutExpired: - error = "generate-batch timed out after 30 minutes" + error = f"generate-batch timed out after {blm_timeout // 60} minutes" _set_status(ctx, "") if clickup_task_id: _fail_clickup_task(ctx, clickup_task_id, error) @@ -584,10 +592,11 @@ def blm_ingest_cora( cli_flags=cli_flags, ) + blm_timeout = _get_blm_timeout(ctx) try: - result = _run_blm_command(ingest_args, blm_dir) + result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout) except subprocess.TimeoutExpired: - return "Error: ingest-cora timed out after 30 minutes." + return f"Error: ingest-cora timed out after {blm_timeout // 60} minutes." parsed = _parse_ingest_output(result.stdout) @@ -638,10 +647,11 @@ def blm_generate_batch( if debug: args.append("--debug") + blm_timeout = _get_blm_timeout(ctx) try: - result = _run_blm_command(args, blm_dir) + result = _run_blm_command(args, blm_dir, timeout=blm_timeout) except subprocess.TimeoutExpired: - return "Error: generate-batch timed out after 30 minutes." + return f"Error: generate-batch timed out after {blm_timeout // 60} minutes." parsed = _parse_generate_output(result.stdout)