Add configurable per-task timeouts (env vars, config.yaml, defaults)

Execution brain timeout bumped from 15 min to 45 min default to avoid
content writing timeouts. BLM stays at 30 min. Both configurable via
CHEDDAH_TIMEOUT_EXECUTION_BRAIN / CHEDDAH_TIMEOUT_BLM env vars or
config.yaml timeouts section.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
PeninsulaInd 2026-03-20 12:52:04 -05:00
parent bdb4c9490a
commit 6e7e2b2320
4 changed files with 44 additions and 11 deletions

View File

@ -382,7 +382,10 @@ class Agent:
skip_permissions: If True, run CLI with --dangerously-skip-permissions.
"""
log.info("Execution brain task: %s", prompt[:100])
kwargs: dict = {"system_prompt": system_context}
kwargs: dict = {
"system_prompt": system_context,
"timeout": self.config.timeouts.execution_brain,
}
if tools:
kwargs["tools"] = tools
if model:

View File

@ -98,6 +98,12 @@ class ApiBudgetConfig:
alert_threshold: float = 0.8 # alert at 80% of limit
@dataclass
class TimeoutConfig:
execution_brain: int = 2700 # 45 minutes
blm: int = 1800 # 30 minutes
@dataclass
class ContentConfig:
cora_inbox: str = "" # e.g. "Z:/content-cora-inbox"
@ -157,6 +163,7 @@ class Config:
autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig)
api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig)
content: ContentConfig = field(default_factory=ContentConfig)
timeouts: TimeoutConfig = field(default_factory=TimeoutConfig)
ntfy: NtfyConfig = field(default_factory=NtfyConfig)
agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()])
@ -221,6 +228,10 @@ def load_config() -> Config:
for k, v in data["content"].items():
if hasattr(cfg.content, k):
setattr(cfg.content, k, v)
if "timeouts" in data and isinstance(data["timeouts"], dict):
for k, v in data["timeouts"].items():
if hasattr(cfg.timeouts, k):
setattr(cfg.timeouts, k, int(v))
# ntfy push notifications
if "ntfy" in data and isinstance(data["ntfy"], dict):
@ -288,6 +299,12 @@ def load_config() -> Config:
if blm_dir := os.getenv("BLM_DIR"):
cfg.link_building.blm_dir = blm_dir
# Timeout env var overrides (seconds)
if t := os.getenv("CHEDDAH_TIMEOUT_EXECUTION_BRAIN"):
cfg.timeouts.execution_brain = int(t)
if t := os.getenv("CHEDDAH_TIMEOUT_BLM"):
cfg.timeouts.blm = int(t)
# Ensure data directories exist
cfg.data_dir.mkdir(parents=True, exist_ok=True)
(cfg.data_dir / "uploads").mkdir(exist_ok=True)

View File

@ -157,6 +157,7 @@ class LLMAdapter:
tools: str = "Bash,Read,Edit,Write,Glob,Grep",
model: str | None = None,
skip_permissions: bool = False,
timeout: int = 2700,
) -> str:
"""Execution brain: calls Claude Code CLI with full tool access.
@ -167,6 +168,7 @@ class LLMAdapter:
tools: Comma-separated Claude Code tool names (default: standard set).
model: Override the CLI model (e.g. "claude-sonnet-4.5").
skip_permissions: If True, append --dangerously-skip-permissions to
timeout: Max seconds to wait for CLI completion (default: 2700 / 45 min).
the CLI invocation (used for automated pipelines).
"""
claude_bin = shutil.which("claude")
@ -218,10 +220,11 @@ class LLMAdapter:
)
try:
stdout, stderr = proc.communicate(input=prompt, timeout=900)
stdout, stderr = proc.communicate(input=prompt, timeout=timeout)
except subprocess.TimeoutExpired:
proc.kill()
return "Error: Claude Code execution timed out after 15 minutes."
minutes = timeout // 60
return f"Error: Claude Code execution timed out after {minutes} minutes."
if proc.returncode != 0:
return f"Execution error: {stderr or 'unknown error'}"

View File

@ -30,6 +30,13 @@ def _get_blm_dir(ctx: dict | None) -> str:
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
def _get_blm_timeout(ctx: dict | None) -> int:
"""Get BLM subprocess timeout from config or default (1800s / 30 min)."""
if ctx and "config" in ctx:
return ctx["config"].timeouts.blm
return 1800
def _run_blm_command(
args: list[str], blm_dir: str, timeout: int = 1800
) -> subprocess.CompletedProcess:
@ -446,10 +453,11 @@ def run_cora_backlinks(
cli_flags=cli_flags,
)
blm_timeout = _get_blm_timeout(ctx)
try:
ingest_result = _run_blm_command(ingest_args, blm_dir)
ingest_result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired:
error = "ingest-cora timed out after 30 minutes"
error = f"ingest-cora timed out after {blm_timeout // 60} minutes"
_set_status(ctx, "")
if clickup_task_id:
_fail_clickup_task(ctx, clickup_task_id, error)
@ -494,9 +502,9 @@ def run_cora_backlinks(
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
try:
gen_result = _run_blm_command(gen_args, blm_dir)
gen_result = _run_blm_command(gen_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired:
error = "generate-batch timed out after 30 minutes"
error = f"generate-batch timed out after {blm_timeout // 60} minutes"
_set_status(ctx, "")
if clickup_task_id:
_fail_clickup_task(ctx, clickup_task_id, error)
@ -584,10 +592,11 @@ def blm_ingest_cora(
cli_flags=cli_flags,
)
blm_timeout = _get_blm_timeout(ctx)
try:
result = _run_blm_command(ingest_args, blm_dir)
result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired:
return "Error: ingest-cora timed out after 30 minutes."
return f"Error: ingest-cora timed out after {blm_timeout // 60} minutes."
parsed = _parse_ingest_output(result.stdout)
@ -638,10 +647,11 @@ def blm_generate_batch(
if debug:
args.append("--debug")
blm_timeout = _get_blm_timeout(ctx)
try:
result = _run_blm_command(args, blm_dir)
result = _run_blm_command(args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired:
return "Error: generate-batch timed out after 30 minutes."
return f"Error: generate-batch timed out after {blm_timeout // 60} minutes."
parsed = _parse_generate_output(result.stdout)