"""Gradio interface for CheddahBot.""" from __future__ import annotations import logging import tempfile from pathlib import Path from typing import TYPE_CHECKING import gradio as gr if TYPE_CHECKING: from .agent import Agent from .config import Config from .llm import LLMAdapter log = logging.getLogger(__name__) _CSS = """ .contain { max-width: 900px; margin: auto; } footer { display: none !important; } """ def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks: """Build and return the Gradio app.""" available_models = llm.list_chat_models() model_choices = [(m.name, m.id) for m in available_models] current_model = llm.current_model exec_status = "available" if llm.is_execution_brain_available() else "unavailable" with gr.Blocks(title="CheddahBot") as app: gr.Markdown("# CheddahBot", elem_classes=["contain"]) gr.Markdown( f"*Chat Brain:* `{current_model}`  |  " f"*Execution Brain (Claude Code CLI):* `{exec_status}`", elem_classes=["contain"], ) with gr.Row(elem_classes=["contain"]): model_dropdown = gr.Dropdown( choices=model_choices, value=current_model, label="Model", interactive=True, allow_custom_value=True, scale=3, ) refresh_btn = gr.Button("Refresh", scale=0, min_width=70) new_chat_btn = gr.Button("New Chat", scale=1, variant="secondary") chatbot = gr.Chatbot( label="Chat", height=500, buttons=["copy"], elem_classes=["contain"], ) with gr.Row(elem_classes=["contain"]): msg_input = gr.MultimodalTextbox( placeholder="Type a message... (attach files, use mic, or camera)", show_label=False, scale=4, sources=["upload", "microphone"], ) # -- Voice Chat Mode -- with gr.Accordion("Voice Chat", open=False, elem_classes=["contain"]): gr.Markdown("Record audio and get a spoken response.") voice_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak") voice_output = gr.Audio(type="filepath", label="Response", autoplay=True) voice_status = gr.Textbox(label="Transcript", interactive=False) # -- Accordion sections -- with gr.Accordion("Conversation History", open=False, elem_classes=["contain"]): conv_list = gr.Dataframe( headers=["ID", "Title", "Last Updated"], label="Past Conversations", interactive=False, ) load_conv_btn = gr.Button("Load Selected") with gr.Accordion("Settings", open=False, elem_classes=["contain"]): gr.Markdown( "Edit `identity/SOUL.md` to change the agent's personality.\n\n" "Edit `identity/USER.md` to update your profile.\n\n" "Edit `config.yaml` for advanced settings." ) # -- Event handlers -- def on_model_change(model_id): llm.switch_model(model_id) return f"Switched to {model_id}" def on_refresh_models(): models = llm.list_chat_models() choices = [(m.name, m.id) for m in models] return gr.update(choices=choices, value=llm.current_model) def on_new_chat(): agent.new_conversation() return [], _load_conversations() def _load_conversations(): convs = agent.db.list_conversations() return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs] def on_user_message(message, chat_history): chat_history = chat_history or [] # Extract text and files from MultimodalTextbox if isinstance(message, dict): text = message.get("text", "") files = message.get("files", []) else: text = str(message) files = [] if not text and not files: yield chat_history, gr.update(value=None) return # Handle audio files - transcribe them processed_files = [] for f in files: fpath = f if isinstance(f, str) else f.get("path", f.get("name", "")) if fpath and Path(fpath).suffix.lower() in (".wav", ".mp3", ".ogg", ".webm", ".m4a"): try: from .media import transcribe_audio transcript = transcribe_audio(fpath) if transcript: text = f"{text}\n[Voice message]: {transcript}" if text else f"[Voice message]: {transcript}" continue except Exception as e: log.warning("Audio transcription failed: %s", e) processed_files.append(fpath) # Add user message user_display = text if processed_files: file_names = [Path(f).name for f in processed_files] user_display += f"\n[Attached: {', '.join(file_names)}]" chat_history = chat_history + [{"role": "user", "content": user_display}] yield chat_history, gr.update(value=None) # Stream assistant response try: response_text = "" chat_history = chat_history + [{"role": "assistant", "content": ""}] for chunk in agent.respond(text, files=processed_files): response_text += chunk chat_history[-1] = {"role": "assistant", "content": response_text} yield chat_history, gr.update(value=None) # If no response came through, show a fallback if not response_text: chat_history[-1] = {"role": "assistant", "content": "(No response received from model)"} yield chat_history, gr.update(value=None) except Exception as e: log.error("Error in agent.respond: %s", e, exc_info=True) chat_history = chat_history + [{"role": "assistant", "content": f"Error: {e}"}] yield chat_history, gr.update(value=None) def on_voice_chat(audio_path): """Handle voice chat: transcribe -> respond -> TTS.""" if not audio_path: return None, "No audio received." try: from .media import transcribe_audio, text_to_speech # Transcribe transcript = transcribe_audio(audio_path) if not transcript: return None, "Could not transcribe audio." # Get response response = agent.respond_to_prompt(transcript) # TTS output_path = config.data_dir / "generated" / "voice_response.mp3" text_to_speech(response, output_path) return str(output_path), f"You said: {transcript}\n\nResponse: {response}" except Exception as e: return None, f"Voice chat error: {e}" # -- Wire events -- model_dropdown.change(on_model_change, [model_dropdown], None) refresh_btn.click(on_refresh_models, None, [model_dropdown]) new_chat_btn.click(on_new_chat, None, [chatbot, conv_list]) msg_input.submit( on_user_message, [msg_input, chatbot], [chatbot, msg_input], ) voice_input.stop_recording( on_voice_chat, [voice_input], [voice_output, voice_status], ) # Load conversation list on app start app.load(_load_conversations, None, [conv_list]) return app, _CSS