CheddahBot/cheddahbot/ui.py

196 lines
6.9 KiB
Python

"""Gradio interface for CheddahBot."""
from __future__ import annotations
import logging
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING
import gradio as gr
if TYPE_CHECKING:
from .agent import Agent
from .config import Config
from .llm import LLMAdapter
log = logging.getLogger(__name__)
_CSS = """
.contain { max-width: 900px; margin: auto; }
footer { display: none !important; }
"""
def create_ui(agent: Agent, config: Config, llm: LLMAdapter) -> gr.Blocks:
"""Build and return the Gradio app."""
available_models = llm.list_available_models()
model_choices = [(m.name, m.id) for m in available_models]
current_model = llm.current_model
with gr.Blocks(title="CheddahBot") as app:
gr.Markdown("# CheddahBot", elem_classes=["contain"])
with gr.Row(elem_classes=["contain"]):
model_dropdown = gr.Dropdown(
choices=model_choices,
value=current_model,
label="Model",
interactive=True,
scale=3,
)
refresh_btn = gr.Button("Refresh", scale=0, min_width=70)
new_chat_btn = gr.Button("New Chat", scale=1, variant="secondary")
chatbot = gr.Chatbot(
label="Chat",
height=500,
buttons=["copy"],
elem_classes=["contain"],
)
with gr.Row(elem_classes=["contain"]):
msg_input = gr.MultimodalTextbox(
placeholder="Type a message... (attach files, use mic, or camera)",
show_label=False,
scale=4,
sources=["upload", "microphone"],
)
# -- Voice Chat Mode --
with gr.Accordion("Voice Chat", open=False, elem_classes=["contain"]):
gr.Markdown("Record audio and get a spoken response.")
voice_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak")
voice_output = gr.Audio(type="filepath", label="Response", autoplay=True)
voice_status = gr.Textbox(label="Transcript", interactive=False)
# -- Accordion sections --
with gr.Accordion("Conversation History", open=False, elem_classes=["contain"]):
conv_list = gr.Dataframe(
headers=["ID", "Title", "Last Updated"],
label="Past Conversations",
interactive=False,
)
load_conv_btn = gr.Button("Load Selected")
with gr.Accordion("Settings", open=False, elem_classes=["contain"]):
gr.Markdown(
"Edit `identity/SOUL.md` to change the agent's personality.\n\n"
"Edit `identity/USER.md` to update your profile.\n\n"
"Edit `config.yaml` for advanced settings."
)
# -- Event handlers --
def on_model_change(model_id):
llm.switch_model(model_id)
return f"Switched to {model_id}"
def on_refresh_models():
models = llm.list_available_models()
choices = [(m.name, m.id) for m in models]
return gr.update(choices=choices, value=llm.current_model)
def on_new_chat():
agent.new_conversation()
return [], _load_conversations()
def _load_conversations():
convs = agent.db.list_conversations()
return [[c["id"], c["title"], c["updated_at"][:19]] for c in convs]
def on_user_message(message, chat_history):
# Extract text and files from MultimodalTextbox
if isinstance(message, dict):
text = message.get("text", "")
files = message.get("files", [])
else:
text = str(message)
files = []
if not text and not files:
yield chat_history, gr.update(value=None)
return
# Handle audio files - transcribe them
processed_files = []
for f in files:
fpath = f if isinstance(f, str) else f.get("path", f.get("name", ""))
if fpath and Path(fpath).suffix.lower() in (".wav", ".mp3", ".ogg", ".webm", ".m4a"):
try:
from .media import transcribe_audio
transcript = transcribe_audio(fpath)
if transcript:
text = f"{text}\n[Voice message]: {transcript}" if text else f"[Voice message]: {transcript}"
continue
except Exception as e:
log.warning("Audio transcription failed: %s", e)
processed_files.append(fpath)
# Add user message
user_display = text
if processed_files:
file_names = [Path(f).name for f in processed_files]
user_display += f"\n[Attached: {', '.join(file_names)}]"
chat_history = chat_history + [{"role": "user", "content": user_display}]
yield chat_history, gr.update(value=None)
# Stream assistant response
response_text = ""
chat_history = chat_history + [{"role": "assistant", "content": ""}]
for chunk in agent.respond(text, files=processed_files):
response_text += chunk
chat_history[-1] = {"role": "assistant", "content": response_text}
yield chat_history, gr.update(value=None)
def on_voice_chat(audio_path):
"""Handle voice chat: transcribe -> respond -> TTS."""
if not audio_path:
return None, "No audio received."
try:
from .media import transcribe_audio, text_to_speech
# Transcribe
transcript = transcribe_audio(audio_path)
if not transcript:
return None, "Could not transcribe audio."
# Get response
response = agent.respond_to_prompt(transcript)
# TTS
output_path = config.data_dir / "generated" / "voice_response.mp3"
text_to_speech(response, output_path)
return str(output_path), f"You said: {transcript}\n\nResponse: {response}"
except Exception as e:
return None, f"Voice chat error: {e}"
# -- Wire events --
model_dropdown.change(on_model_change, [model_dropdown], None)
refresh_btn.click(on_refresh_models, None, [model_dropdown])
new_chat_btn.click(on_new_chat, None, [chatbot, conv_list])
msg_input.submit(
on_user_message,
[msg_input, chatbot],
[chatbot, msg_input],
)
voice_input.stop_recording(
on_voice_chat,
[voice_input],
[voice_output, voice_status],
)
# Load conversation list on app start
app.load(_load_conversations, None, [conv_list])
return app, _CSS