"""Data processing tools: CSV/JSON operations.""" from __future__ import annotations import csv import io import json from pathlib import Path from . import tool @tool("read_csv", "Read a CSV file and return summary or specific rows", category="data") def read_csv(path: str, max_rows: int = 20) -> str: p = Path(path).resolve() if not p.exists(): return f"File not found: {path}" try: with open(p, newline="", encoding="utf-8-sig") as f: reader = csv.reader(f) rows = [] for i, row in enumerate(reader): rows.append(row) if i >= max_rows: break if not rows: return "Empty CSV file." # Format as table header = rows[0] lines = [" | ".join(header), " | ".join("---" for _ in header)] for row in rows[1:]: lines.append(" | ".join(str(c)[:50] for c in row)) result = "\n".join(lines) total_line_count = sum(1 for _ in open(p, encoding="utf-8-sig")) if total_line_count > max_rows + 1: result += f"\n\n... ({total_line_count - 1} total rows, showing first {max_rows})" return result except Exception as e: return f"Error reading CSV: {e}" @tool("read_json", "Read and pretty-print a JSON file", category="data") def read_json(path: str) -> str: p = Path(path).resolve() if not p.exists(): return f"File not found: {path}" try: data = json.loads(p.read_text(encoding="utf-8")) formatted = json.dumps(data, indent=2, ensure_ascii=False) if len(formatted) > 15000: formatted = formatted[:15000] + "\n... (truncated)" return formatted except Exception as e: return f"Error reading JSON: {e}" @tool("query_json", "Extract data from a JSON file using a dot-notation path", category="data") def query_json(path: str, json_path: str) -> str: """json_path example: 'data.users.0.name' or 'results.*.id'""" p = Path(path).resolve() if not p.exists(): return f"File not found: {path}" try: data = json.loads(p.read_text(encoding="utf-8")) result = _navigate(data, json_path.split(".")) return json.dumps(result, indent=2, ensure_ascii=False) if not isinstance(result, str) else result except Exception as e: return f"Error: {e}" def _navigate(data, parts: list[str]): for part in parts: if part == "*" and isinstance(data, list): return data elif isinstance(data, dict): data = data.get(part, f"Key '{part}' not found") elif isinstance(data, list): try: data = data[int(part)] except (ValueError, IndexError): return f"Invalid index '{part}'" else: return f"Cannot navigate into {type(data).__name__}" return data