Spaces:
Paused
Paused
"use client" | |
import { useRef, useState, useTransition } from "react" | |
import { format } from "date-fns" | |
import { Observe } from "./observe" | |
import { cn } from "@/lib/utils" | |
import { think } from "./engine/think" | |
import { Progress } from "./interface/progress" | |
import { Listen } from "./listen" | |
import { Speak } from "./speak" | |
import { Toaster } from "@/components/ui/toaster" | |
export default function Main() { | |
const [_isPending, startTransition] = useTransition() | |
const [lastImage, setLastImage] = useState<string>("") | |
const [lastRawObservation, setLastRawObservation] = useState<string>("") | |
const [isLoadingAction, setLoadingAction] = useState(false) | |
const [action, setAction] = useState<string>("Nothing to say yet.") | |
const lastEvent = useRef("") | |
const handleOnEvent = (event: string, needAnswer: boolean) => { | |
lastEvent.current = event | |
setLoadingAction(true) | |
startTransition(async () => { | |
try { | |
const action = await think(event, needAnswer) | |
// here what could happen is that we received a message more recent than what the LLM is currently working on | |
// when that happen, the best is to just interrupt the LLM (well.. in our case, it means ignore what it says) | |
const canSetAction = action && lastEvent.current === event | |
if (canSetAction) { | |
setAction(action) | |
} | |
} catch (err) { | |
console.error(err) | |
} finally { | |
setLoadingAction(false) | |
} | |
}) | |
} | |
// receive a new observation from what the agent is looking at | |
const handleOnObserve = (observation: string, image: string) => { | |
setLastRawObservation(observation) | |
setLastImage(image) | |
if (!observation) { return } | |
// handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are seeing this: ${observation}`) | |
handleOnEvent(`(looking at at ${observation})`, false) | |
} | |
const handleOnListen = (recording: string) => { | |
if (!recording || recording === "[BLANK_AUDIO]") { return } | |
// handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are hearing this: ${recording}`) | |
handleOnEvent(`${recording}`, true) | |
} | |
return ( | |
<div className="w-screen h-screen bg-zinc-100"> | |
<div className="fixed z-10 left-0 right-0 flex flex-col items-center justify-center"> | |
<div className={cn( | |
`flex flex-col md:flex-row`, | |
`items-center justify-between`, | |
`w-full md:w-[90%] lg:w-[80%]`, | |
`p-2 mt-0 md:p-4 md:mt-8`, | |
`bg-zinc-100 md:rounded-xl`, | |
`shadow-2xl text-xs md:text-sm` | |
)}> | |
<div className="flex flex-row space-x-4 w-full md:w-1/2 p-2 md:p-4"> | |
<div className="flex w-[112px]"> | |
{lastImage ? | |
<div className="w-28 aspect-video"> | |
<img | |
src={lastImage} | |
alt="screenshot" | |
className="rounded-lg shadow-xl border border-zinc-500" | |
/> | |
</div> : null} | |
</div> | |
<div className="text-lg flex-grow italic"> | |
<span className="text-zinc-700 text-lg"> | |
{lastRawObservation} | |
</span> | |
</div> | |
</div> | |
<div className="flex flex-row w-full md:w-1/2 p-2 md:p-4"> | |
<div className="w-full text-zinc-800 text-lg"> | |
{action} | |
</div> | |
</div> | |
</div> | |
</div> | |
<Observe onObserve={handleOnObserve} /> | |
<Listen onListen={handleOnListen} /> | |
<Speak>{action}</Speak> | |
<Toaster /> | |
<Progress | |
isLoading={isLoadingAction} | |
resetKey="" | |
className="left-6 right-0" | |
/> | |
<div className="fixed z-10 left-0 right-0 bottom-0 flex flex-col items-center justify-center"> | |
<div className="full md:w-[80%] lg:w-[70%] mb-0 md:p-4 md:mb-8 bg-zinc-100 md:rounded-xl p-4 shadow-2xl text-xs md:text-sm"> | |
<p>🅿️ <span className="font-semibold"> | |
</span>A multimodal demo to make | |
<a href="https://huggingface.co/meta-llama" target="_blank" className="font-semibold underline"> Llama-2 </a> hear, see and talk. | |
You need a laptop computer with <a href="https://caniuse.com/webgpu" target="_blank" className="font-semibold underline">a modern browser supporting WebGPU</a>. | |
Vision is handled by <a href="https://huggingface.co/HuggingFaceM4/idefics-80b#bias-evaluation" target="_blank" className="font-semibold underline"> IDEFICS </a></p> | |
<p>⛔️ <span className="font-semibold">Limitations: </span>This demo is provided as-is, for demonstration and research purpose only. As it demonstrates WebGPU technology, this demo will not support incompatible browsers and/or devices. No guarantee of factually correct results. In some cases, the models may return hallucinated or innapropriate responses.</p> | |
</div> | |
</div> | |
</div> | |
) | |
} |