CheddahBot/cheddahbot/ui.py

"""Gradio interface for CheddahBot."""

from __future__ import annotations

import logging
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING

import gradio as gr

if TYPE_CHECKING:
    from .agent import Agent
    from .config import Config
    from .llm import LLMAdapter

log = logging.getLogger(__name__)

_CSS = """
.contain { max-width: 900px; margin: auto; }
footer { display: none !important; }
"""


def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
    """Build and return the Gradio app."""

    available_models = llm.list_available_models()
    model_choices = [(m.name, m.id) for m in available_models]
    current_model = llm.current_model

    with gr.Blocks(title="CheddahBot") as app:
        gr.Markdown("# CheddahBot", elem_classes=["contain"])

        with gr.Row(elem_classes=["contain"]):
            model_dropdown = gr.Dropdown(
                choices=model_choices,
                value=current_model,
                label="Model",
                interactive=True,
                scale=3,
            )
            refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
            new_chat_btn = gr.Button("New Chat", scale=1, variant="secondary")

        chatbot = gr.Chatbot(
            label="Chat",
            height=500,
            buttons=["copy"],
            elem_classes=["contain"],
        )

        with gr.Row(elem_classes=["contain"]):
            msg_input = gr.MultimodalTextbox(
                placeholder="Type a message... (attach files, use mic, or camera)",
                show_label=False,
                scale=4,
                sources=["upload", "microphone"],
            )

        # -- Voice Chat Mode --

        with gr.Accordion("Voice Chat", open=False, elem_classes=["contain"]):
            gr.Markdown("Record audio and get a spoken response.")
            voice_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak")
            voice_output = gr.Audio(type="filepath", label="Response", autoplay=True)
            voice_status = gr.Textbox(label="Transcript", interactive=False)

        # -- Accordion sections --

        with gr.Accordion("Conversation History", open=False, elem_classes=["contain"]):
            conv_list = gr.Dataframe(
                headers=["ID", "Title", "Last Updated"],
                label="Past Conversations",
                interactive=False,
            )
            load_conv_btn = gr.Button("Load Selected")

        with gr.Accordion("Settings", open=False, elem_classes=["contain"]):
            gr.Markdown(
                "Edit `identity/SOUL.md` to change the agent's personality.\n\n"
                "Edit `identity/USER.md` to update your profile.\n\n"
                "Edit `config.yaml` for advanced settings."
            )

        # -- Event handlers --

        def on_model_change(model_id):
            llm.switch_model(model_id)
            return f"Switched to {model_id}"

        def on_refresh_models():
            models = llm.list_available_models()
            choices = [(m.name, m.id) for m in models]
            return gr.update(choices=choices, value=llm.current_model)

        def on_new_chat():
            agent.new_conversation()
            return [], _load_conversations()

        def _load_conversations():
            convs = agent.db.list_conversations()
            return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]

        def on_user_message(message, chat_history):
            # Extract text and files from MultimodalTextbox
            if isinstance(message, dict):
                text = message.get("text", "")
                files = message.get("files", [])
            else:
                text = str(message)
                files = []

            if not text and not files:
                yield chat_history, gr.update(value=None)
                return

            # Handle audio files - transcribe them
            processed_files = []
            for f in files:
                fpath = f if isinstance(f, str) else f.get("path", f.get("name", ""))
                if fpath and Path(fpath).suffix.lower() in (".wav", ".mp3", ".ogg", ".webm", ".m4a"):
                    try:
                        from .media import transcribe_audio
                        transcript = transcribe_audio(fpath)
                        if transcript:
                            text = f"{text}\n[Voice message]: {transcript}" if text else f"[Voice message]: {transcript}"
                        continue
                    except Exception as e:
                        log.warning("Audio transcription failed: %s", e)
                processed_files.append(fpath)

            # Add user message
            user_display = text
            if processed_files:
                file_names = [Path(f).name for f in processed_files]
                user_display += f"\n[Attached: {', '.join(file_names)}]"

            chat_history = chat_history + [{"role": "user", "content": user_display}]
            yield chat_history, gr.update(value=None)

            # Stream assistant response
            response_text = ""
            chat_history = chat_history + [{"role": "assistant", "content": ""}]

            for chunk in agent.respond(text, files=processed_files):
                response_text += chunk
                chat_history[-1] = {"role": "assistant", "content": response_text}
                yield chat_history, gr.update(value=None)

        def on_voice_chat(audio_path):
            """Handle voice chat: transcribe -> respond -> TTS."""
            if not audio_path:
                return None, "No audio received."
            try:
                from .media import transcribe_audio, text_to_speech

                # Transcribe
                transcript = transcribe_audio(audio_path)
                if not transcript:
                    return None, "Could not transcribe audio."

                # Get response
                response = agent.respond_to_prompt(transcript)

                # TTS
                output_path = config.data_dir / "generated" / "voice_response.mp3"
                text_to_speech(response, output_path)

                return str(output_path), f"You said: {transcript}\n\nResponse: {response}"
            except Exception as e:
                return None, f"Voice chat error: {e}"

        # -- Wire events --

        model_dropdown.change(on_model_change, [model_dropdown], None)
        refresh_btn.click(on_refresh_models, None, [model_dropdown])
        new_chat_btn.click(on_new_chat, None, [chatbot, conv_list])

        msg_input.submit(
            on_user_message,
            [msg_input, chatbot],
            [chatbot, msg_input],
        )

        voice_input.stop_recording(
            on_voice_chat,
            [voice_input],
            [voice_output, voice_status],
        )

        # Load conversation list on app start
        app.load(_load_conversations, None, [conv_list])

    return app, _CSS