Add configurable per-task timeouts (env vars, config.yaml, defaults)
Execution brain timeout bumped from 15 min to 45 min default to avoid content writing timeouts. BLM stays at 30 min. Both configurable via CHEDDAH_TIMEOUT_EXECUTION_BRAIN / CHEDDAH_TIMEOUT_BLM env vars or config.yaml timeouts section. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>master
parent
bdb4c9490a
commit
6e7e2b2320
|
|
@ -382,7 +382,10 @@ class Agent:
|
||||||
skip_permissions: If True, run CLI with --dangerously-skip-permissions.
|
skip_permissions: If True, run CLI with --dangerously-skip-permissions.
|
||||||
"""
|
"""
|
||||||
log.info("Execution brain task: %s", prompt[:100])
|
log.info("Execution brain task: %s", prompt[:100])
|
||||||
kwargs: dict = {"system_prompt": system_context}
|
kwargs: dict = {
|
||||||
|
"system_prompt": system_context,
|
||||||
|
"timeout": self.config.timeouts.execution_brain,
|
||||||
|
}
|
||||||
if tools:
|
if tools:
|
||||||
kwargs["tools"] = tools
|
kwargs["tools"] = tools
|
||||||
if model:
|
if model:
|
||||||
|
|
|
||||||
|
|
@ -98,6 +98,12 @@ class ApiBudgetConfig:
|
||||||
alert_threshold: float = 0.8 # alert at 80% of limit
|
alert_threshold: float = 0.8 # alert at 80% of limit
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TimeoutConfig:
|
||||||
|
execution_brain: int = 2700 # 45 minutes
|
||||||
|
blm: int = 1800 # 30 minutes
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ContentConfig:
|
class ContentConfig:
|
||||||
cora_inbox: str = "" # e.g. "Z:/content-cora-inbox"
|
cora_inbox: str = "" # e.g. "Z:/content-cora-inbox"
|
||||||
|
|
@ -157,6 +163,7 @@ class Config:
|
||||||
autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig)
|
autocora: AutoCoraConfig = field(default_factory=AutoCoraConfig)
|
||||||
api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig)
|
api_budget: ApiBudgetConfig = field(default_factory=ApiBudgetConfig)
|
||||||
content: ContentConfig = field(default_factory=ContentConfig)
|
content: ContentConfig = field(default_factory=ContentConfig)
|
||||||
|
timeouts: TimeoutConfig = field(default_factory=TimeoutConfig)
|
||||||
ntfy: NtfyConfig = field(default_factory=NtfyConfig)
|
ntfy: NtfyConfig = field(default_factory=NtfyConfig)
|
||||||
agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()])
|
agents: list[AgentConfig] = field(default_factory=lambda: [AgentConfig()])
|
||||||
|
|
||||||
|
|
@ -221,6 +228,10 @@ def load_config() -> Config:
|
||||||
for k, v in data["content"].items():
|
for k, v in data["content"].items():
|
||||||
if hasattr(cfg.content, k):
|
if hasattr(cfg.content, k):
|
||||||
setattr(cfg.content, k, v)
|
setattr(cfg.content, k, v)
|
||||||
|
if "timeouts" in data and isinstance(data["timeouts"], dict):
|
||||||
|
for k, v in data["timeouts"].items():
|
||||||
|
if hasattr(cfg.timeouts, k):
|
||||||
|
setattr(cfg.timeouts, k, int(v))
|
||||||
|
|
||||||
# ntfy push notifications
|
# ntfy push notifications
|
||||||
if "ntfy" in data and isinstance(data["ntfy"], dict):
|
if "ntfy" in data and isinstance(data["ntfy"], dict):
|
||||||
|
|
@ -288,6 +299,12 @@ def load_config() -> Config:
|
||||||
if blm_dir := os.getenv("BLM_DIR"):
|
if blm_dir := os.getenv("BLM_DIR"):
|
||||||
cfg.link_building.blm_dir = blm_dir
|
cfg.link_building.blm_dir = blm_dir
|
||||||
|
|
||||||
|
# Timeout env var overrides (seconds)
|
||||||
|
if t := os.getenv("CHEDDAH_TIMEOUT_EXECUTION_BRAIN"):
|
||||||
|
cfg.timeouts.execution_brain = int(t)
|
||||||
|
if t := os.getenv("CHEDDAH_TIMEOUT_BLM"):
|
||||||
|
cfg.timeouts.blm = int(t)
|
||||||
|
|
||||||
# Ensure data directories exist
|
# Ensure data directories exist
|
||||||
cfg.data_dir.mkdir(parents=True, exist_ok=True)
|
cfg.data_dir.mkdir(parents=True, exist_ok=True)
|
||||||
(cfg.data_dir / "uploads").mkdir(exist_ok=True)
|
(cfg.data_dir / "uploads").mkdir(exist_ok=True)
|
||||||
|
|
|
||||||
|
|
@ -157,6 +157,7 @@ class LLMAdapter:
|
||||||
tools: str = "Bash,Read,Edit,Write,Glob,Grep",
|
tools: str = "Bash,Read,Edit,Write,Glob,Grep",
|
||||||
model: str | None = None,
|
model: str | None = None,
|
||||||
skip_permissions: bool = False,
|
skip_permissions: bool = False,
|
||||||
|
timeout: int = 2700,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Execution brain: calls Claude Code CLI with full tool access.
|
"""Execution brain: calls Claude Code CLI with full tool access.
|
||||||
|
|
||||||
|
|
@ -167,6 +168,7 @@ class LLMAdapter:
|
||||||
tools: Comma-separated Claude Code tool names (default: standard set).
|
tools: Comma-separated Claude Code tool names (default: standard set).
|
||||||
model: Override the CLI model (e.g. "claude-sonnet-4.5").
|
model: Override the CLI model (e.g. "claude-sonnet-4.5").
|
||||||
skip_permissions: If True, append --dangerously-skip-permissions to
|
skip_permissions: If True, append --dangerously-skip-permissions to
|
||||||
|
timeout: Max seconds to wait for CLI completion (default: 2700 / 45 min).
|
||||||
the CLI invocation (used for automated pipelines).
|
the CLI invocation (used for automated pipelines).
|
||||||
"""
|
"""
|
||||||
claude_bin = shutil.which("claude")
|
claude_bin = shutil.which("claude")
|
||||||
|
|
@ -218,10 +220,11 @@ class LLMAdapter:
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
stdout, stderr = proc.communicate(input=prompt, timeout=900)
|
stdout, stderr = proc.communicate(input=prompt, timeout=timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
proc.kill()
|
proc.kill()
|
||||||
return "Error: Claude Code execution timed out after 15 minutes."
|
minutes = timeout // 60
|
||||||
|
return f"Error: Claude Code execution timed out after {minutes} minutes."
|
||||||
|
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
return f"Execution error: {stderr or 'unknown error'}"
|
return f"Execution error: {stderr or 'unknown error'}"
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,13 @@ def _get_blm_dir(ctx: dict | None) -> str:
|
||||||
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
|
return os.getenv("BLM_DIR", "E:/dev/Big-Link-Man")
|
||||||
|
|
||||||
|
|
||||||
|
def _get_blm_timeout(ctx: dict | None) -> int:
|
||||||
|
"""Get BLM subprocess timeout from config or default (1800s / 30 min)."""
|
||||||
|
if ctx and "config" in ctx:
|
||||||
|
return ctx["config"].timeouts.blm
|
||||||
|
return 1800
|
||||||
|
|
||||||
|
|
||||||
def _run_blm_command(
|
def _run_blm_command(
|
||||||
args: list[str], blm_dir: str, timeout: int = 1800
|
args: list[str], blm_dir: str, timeout: int = 1800
|
||||||
) -> subprocess.CompletedProcess:
|
) -> subprocess.CompletedProcess:
|
||||||
|
|
@ -446,10 +453,11 @@ def run_cora_backlinks(
|
||||||
cli_flags=cli_flags,
|
cli_flags=cli_flags,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
blm_timeout = _get_blm_timeout(ctx)
|
||||||
try:
|
try:
|
||||||
ingest_result = _run_blm_command(ingest_args, blm_dir)
|
ingest_result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
error = "ingest-cora timed out after 30 minutes"
|
error = f"ingest-cora timed out after {blm_timeout // 60} minutes"
|
||||||
_set_status(ctx, "")
|
_set_status(ctx, "")
|
||||||
if clickup_task_id:
|
if clickup_task_id:
|
||||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||||
|
|
@ -494,9 +502,9 @@ def run_cora_backlinks(
|
||||||
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
|
gen_args = ["generate-batch", "-j", str(job_path), "--continue-on-error"]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
gen_result = _run_blm_command(gen_args, blm_dir)
|
gen_result = _run_blm_command(gen_args, blm_dir, timeout=blm_timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
error = "generate-batch timed out after 30 minutes"
|
error = f"generate-batch timed out after {blm_timeout // 60} minutes"
|
||||||
_set_status(ctx, "")
|
_set_status(ctx, "")
|
||||||
if clickup_task_id:
|
if clickup_task_id:
|
||||||
_fail_clickup_task(ctx, clickup_task_id, error)
|
_fail_clickup_task(ctx, clickup_task_id, error)
|
||||||
|
|
@ -584,10 +592,11 @@ def blm_ingest_cora(
|
||||||
cli_flags=cli_flags,
|
cli_flags=cli_flags,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
blm_timeout = _get_blm_timeout(ctx)
|
||||||
try:
|
try:
|
||||||
result = _run_blm_command(ingest_args, blm_dir)
|
result = _run_blm_command(ingest_args, blm_dir, timeout=blm_timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
return "Error: ingest-cora timed out after 30 minutes."
|
return f"Error: ingest-cora timed out after {blm_timeout // 60} minutes."
|
||||||
|
|
||||||
parsed = _parse_ingest_output(result.stdout)
|
parsed = _parse_ingest_output(result.stdout)
|
||||||
|
|
||||||
|
|
@ -638,10 +647,11 @@ def blm_generate_batch(
|
||||||
if debug:
|
if debug:
|
||||||
args.append("--debug")
|
args.append("--debug")
|
||||||
|
|
||||||
|
blm_timeout = _get_blm_timeout(ctx)
|
||||||
try:
|
try:
|
||||||
result = _run_blm_command(args, blm_dir)
|
result = _run_blm_command(args, blm_dir, timeout=blm_timeout)
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
return "Error: generate-batch timed out after 30 minutes."
|
return f"Error: generate-batch timed out after {blm_timeout // 60} minutes."
|
||||||
|
|
||||||
parsed = _parse_generate_output(result.stdout)
|
parsed = _parse_generate_output(result.stdout)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue