CheddahBot/cheddahbot/tools/data_proc.py

88 lines
2.8 KiB
Python

"""Data processing tools: CSV/JSON operations."""
from __future__ import annotations
import csv
import io
import json
from pathlib import Path
from . import tool
@tool("read_csv", "Read a CSV file and return summary or specific rows", category="data")
def read_csv(path: str, max_rows: int = 20) -> str:
p = Path(path).resolve()
if not p.exists():
return f"File not found: {path}"
try:
with open(p, newline="", encoding="utf-8-sig") as f:
reader = csv.reader(f)
rows = []
for i, row in enumerate(reader):
rows.append(row)
if i >= max_rows:
break
if not rows:
return "Empty CSV file."
# Format as table
header = rows[0]
lines = [" | ".join(header), " | ".join("---" for _ in header)]
for row in rows[1:]:
lines.append(" | ".join(str(c)[:50] for c in row))
result = "\n".join(lines)
total_line_count = sum(1 for _ in open(p, encoding="utf-8-sig"))
if total_line_count > max_rows + 1:
result += f"\n\n... ({total_line_count - 1} total rows, showing first {max_rows})"
return result
except Exception as e:
return f"Error reading CSV: {e}"
@tool("read_json", "Read and pretty-print a JSON file", category="data")
def read_json(path: str) -> str:
p = Path(path).resolve()
if not p.exists():
return f"File not found: {path}"
try:
data = json.loads(p.read_text(encoding="utf-8"))
formatted = json.dumps(data, indent=2, ensure_ascii=False)
if len(formatted) > 15000:
formatted = formatted[:15000] + "\n... (truncated)"
return formatted
except Exception as e:
return f"Error reading JSON: {e}"
@tool("query_json", "Extract data from a JSON file using a dot-notation path", category="data")
def query_json(path: str, json_path: str) -> str:
"""json_path example: 'data.users.0.name' or 'results.*.id'"""
p = Path(path).resolve()
if not p.exists():
return f"File not found: {path}"
try:
data = json.loads(p.read_text(encoding="utf-8"))
result = _navigate(data, json_path.split("."))
return json.dumps(result, indent=2, ensure_ascii=False) if not isinstance(result, str) else result
except Exception as e:
return f"Error: {e}"
def _navigate(data, parts: list[str]):
for part in parts:
if part == "*" and isinstance(data, list):
return data
elif isinstance(data, dict):
data = data.get(part, f"Key '{part}' not found")
elif isinstance(data, list):
try:
data = data[int(part)]
except (ValueError, IndexError):
return f"Invalid index '{part}'"
else:
return f"Cannot navigate into {type(data).__name__}"
return data