Add configurable per-task timeouts (env vars, config.yaml, defaults)

Execution brain timeout bumped from 15 min to 45 min default to avoid
content writing timeouts. BLM stays at 30 min. Both configurable via
CHEDDAH_TIMEOUT_EXECUTION_BRAIN / CHEDDAH_TIMEOUT_BLM env vars or
config.yaml timeouts section.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
master
PeninsulaInd 2026-03-20 12:52:04 -05:00
parent bdb4c9490a
commit 6e7e2b2320
4 changed files with 44 additions and 11 deletions

View File

@ -382,7 +382,10 @@ class Agent:
skip_permissions: If True, run CLI with --dangerously-skip-permissions. skip_permissions: If True, run CLI with --dangerously-skip-permissions.
""" """
log.info("Execution brain task: %s", prompt[:100]) log.info("Execution brain task: %s", prompt[:100])
kwargs: dict = {"system_prompt": system_context} kwargs: dict = {
"system_prompt": system_context,
"timeout": self.config.timeouts.execution_brain,
}
if tools: if tools:
kwargs["tools"] = tools kwargs["tools"] = tools
if model: if model:

View File

@ -98,6 +98,12 @@ class ApiBudgetConfig:
alert_threshold: float = 0.8 # alert at 80% of limit alert_threshold: float = 0.8 # alert at 80% of limit
@dataclass
class TimeoutConfig:
execution_brain: int = 2700 # 45 minutes
blm: int = 1800 # 30 minutes
@dataclass @dataclass
class ContentConfig: class ContentConfig:
cora_inbox: str = "" # e.g. "Z:/content-cora-inbox" cora_inbox: str = "" # e.g. "Z:/content-cora-inbox"
@ -157,6 +163,7 @@ class Config:
autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig) autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig)
api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig) api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig)
content: ContentConfig = field(default_factory=ContentConfig) content: ContentConfig = field(default_factory=ContentConfig)
timeouts: TimeoutConfig = field(default_factory=TimeoutConfig)
ntfy: NtfyConfig = field(default_factory=NtfyConfig) ntfy: NtfyConfig = field(default_factory=NtfyConfig)
agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()]) agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()])
@ -221,6 +228,10 @@ def load_config() -> Config:
for k, v in data["content"].items(): for k, v in data["content"].items():
if hasattr(cfg.content, k): if hasattr(cfg.content, k):
setattr(cfg.content, k, v) setattr(cfg.content, k, v)
if "timeouts" in data and isinstance(data["timeouts"], dict):
for k, v in data["timeouts"].items():
if hasattr(cfg.timeouts, k):
setattr(cfg.timeouts, k, int(v))
# ntfy push notifications # ntfy push notifications
if "ntfy" in data and isinstance(data["ntfy"], dict): if "ntfy" in data and isinstance(data["ntfy"], dict):
@ -288,6 +299,12 @@ def load_config() -> Config:
if blm_dir := os.getenv("BLM_DIR"): if blm_dir := os.getenv("BLM_DIR"):
cfg.link_building.blm_dir = blm_dir cfg.link_building.blm_dir = blm_dir
# Timeout env var overrides (seconds)
if t := os.getenv("CHEDDAH_TIMEOUT_EXECUTION_BRAIN"):
cfg.timeouts.execution_brain = int(t)
if t := os.getenv("CHEDDAH_TIMEOUT_BLM"):
cfg.timeouts.blm = int(t)
# Ensure data directories exist # Ensure data directories exist
cfg.data_dir.mkdir(parents=True, exist_ok=True) cfg.data_dir.mkdir(parents=True, exist_ok=True)
(cfg.data_dir / "uploads").mkdir(exist_ok=True) (cfg.data_dir / "uploads").mkdir(exist_ok=True)

View File

@ -157,6 +157,7 @@ class LLMAdapter:
tools: str = "Bash,Read,Edit,Write,Glob,Grep", tools: str = "Bash,Read,Edit,Write,Glob,Grep",
model: str | None = None, model: str | None = None,
skip_permissions: bool = False, skip_permissions: bool = False,
timeout: int = 2700,
) -> str: ) -> str:
"""Execution brain: calls Claude Code CLI with full tool access. """Execution brain: calls Claude Code CLI with full tool access.
@ -167,6 +168,7 @@ class LLMAdapter:
tools: Comma-separated Claude Code tool names (default: standard set). tools: Comma-separated Claude Code tool names (default: standard set).
model: Override the CLI model (e.g. "claude-sonnet-4.5"). model: Override the CLI model (e.g. "claude-sonnet-4.5").
skip_permissions: If True, append --dangerously-skip-permissions to skip_permissions: If True, append --dangerously-skip-permissions to
timeout: Max seconds to wait for CLI completion (default: 2700 / 45 min).
the CLI invocation (used for automated pipelines). the CLI invocation (used for automated pipelines).
""" """
claude_bin = shutil.which("claude") claude_bin = shutil.which("claude")
@ -218,10 +220,11 @@ class LLMAdapter:
) )
try: try:
stdout, stderr = proc.communicate(input=prompt, timeout=900) stdout, stderr = proc.communicate(input=prompt, timeout=timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
proc.kill() proc.kill()
return "Error: Claude Code execution timed out after 15 minutes." minutes = timeout // 60
return f"Error: Claude Code execution timed out after {minutes} minutes."
if proc.returncode != 0: if proc.returncode != 0:
return f"Execution error: {stderr or 'unknown error'}" return f"Execution error: {stderr or 'unknown error'}"

View File

@ -30,6 +30,13 @@ def _get_blm_dir(ctx: dict | None) -> str:
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man") return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
def _get_blm_timeout(ctx: dict | None) -> int:
"""Get BLM subprocess timeout from config or default (1800s / 30 min)."""
if ctx and "config" in ctx:
return ctx["config"].timeouts.blm
return 1800
def _run_blm_command( def _run_blm_command(
args: list[str], blm_dir: str, timeout: int = 1800 args: list[str], blm_dir: str, timeout: int = 1800
) -> subprocess.CompletedProcess: ) -> subprocess.CompletedProcess:
@ -446,10 +453,11 @@ def run_cora_backlinks(
cli_flags=cli_flags, cli_flags=cli_flags,
) )
blm_timeout = _get_blm_timeout(ctx)
try: try:
ingest_result = _run_blm_command(ingest_args, blm_dir) ingest_result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
error = "ingest-cora timed out after 30 minutes" error = f"ingest-cora timed out after {blm_timeout // 60} minutes"
_set_status(ctx, "") _set_status(ctx, "")
if clickup_task_id: if clickup_task_id:
_fail_clickup_task(ctx, clickup_task_id, error) _fail_clickup_task(ctx, clickup_task_id, error)
@ -494,9 +502,9 @@ def run_cora_backlinks(
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"] gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
try: try:
gen_result = _run_blm_command(gen_args, blm_dir) gen_result = _run_blm_command(gen_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
error = "generate-batch timed out after 30 minutes" error = f"generate-batch timed out after {blm_timeout // 60} minutes"
_set_status(ctx, "") _set_status(ctx, "")
if clickup_task_id: if clickup_task_id:
_fail_clickup_task(ctx, clickup_task_id, error) _fail_clickup_task(ctx, clickup_task_id, error)
@ -584,10 +592,11 @@ def blm_ingest_cora(
cli_flags=cli_flags, cli_flags=cli_flags,
) )
blm_timeout = _get_blm_timeout(ctx)
try: try:
result = _run_blm_command(ingest_args, blm_dir) result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return "Error: ingest-cora timed out after 30 minutes." return f"Error: ingest-cora timed out after {blm_timeout // 60} minutes."
parsed = _parse_ingest_output(result.stdout) parsed = _parse_ingest_output(result.stdout)
@ -638,10 +647,11 @@ def blm_generate_batch(
if debug: if debug:
args.append("--debug") args.append("--debug")
blm_timeout = _get_blm_timeout(ctx)
try: try:
result = _run_blm_command(args, blm_dir) result = _run_blm_command(args, blm_dir, timeout=blm_timeout)
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
return "Error: generate-batch timed out after 30 minutes." return f"Error: generate-batch timed out after {blm_timeout // 60} minutes."
parsed = _parse_generate_output(result.stdout) parsed = _parse_generate_output(result.stdout)