"use client" import { useRef, useState, useTransition } from "react" import { format } from "date-fns" import { Observe } from "./observe" import { cn } from "@/lib/utils" import { think } from "./engine/think" import { Progress } from "./interface/progress" import { Listen } from "./listen" import { Speak } from "./speak" import { Toaster } from "@/components/ui/toaster" export default function Main() { const [_isPending, startTransition] = useTransition() const [lastImage, setLastImage] = useState("") const [lastRawObservation, setLastRawObservation] = useState("") const [isLoadingAction, setLoadingAction] = useState(false) const [action, setAction] = useState("Nothing to say yet.") const lastEvent = useRef("") const handleOnEvent = (event: string, needAnswer: boolean) => { lastEvent.current = event setLoadingAction(true) startTransition(async () => { try { const action = await think(event, needAnswer) // here what could happen is that we received a message more recent than what the LLM is currently working on // when that happen, the best is to just interrupt the LLM (well.. in our case, it means ignore what it says) const canSetAction = action && lastEvent.current === event if (canSetAction) { setAction(action) } } catch (err) { console.error(err) } finally { setLoadingAction(false) } }) } // receive a new observation from what the agent is looking at const handleOnObserve = (observation: string, image: string) => { setLastRawObservation(observation) setLastImage(image) if (!observation) { return } // handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are seeing this: ${observation}`) handleOnEvent(`(looking at at ${observation})`, false) } const handleOnListen = (recording: string) => { if (!recording || recording === "[BLANK_AUDIO]") { return } // handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are hearing this: ${recording}`) handleOnEvent(`${recording}`, true) } return (
{lastImage ?
screenshot
: null}
{lastRawObservation}
{action}
{action}

🅿️ A multimodal demo to make Llama-2 hear, see and talk. You need a laptop computer with a modern browser supporting WebGPU. Vision is handled by IDEFICS

⛔️ Limitations: This demo is provided as-is, for demonstration and research purpose only. As it demonstrates WebGPU technology, this demo will not support incompatible browsers and/or devices. No guarantee of factually correct results. In some cases, the models may return hallucinated or innapropriate responses.

) }