Spaces:

jbilcke-hf
/

observer

Paused

App Files Files Community

observer / src /app /main.tsx

jbilcke-hf HF staff

a quick win, maybe

050e1d0 about 1 year ago

raw

history blame contribute delete

4.95 kB

	"use client"

	import { useRef, useState, useTransition } from "react"
	import { format } from "date-fns"

	import { Observe } from "./observe"
	import { cn } from "@/lib/utils"

	import { think } from "./engine/think"
	import { Progress } from "./interface/progress"
	import { Listen } from "./listen"
	import { Speak } from "./speak"
	import { Toaster } from "@/components/ui/toaster"

	export default function Main() {
	const [_isPending, startTransition] = useTransition()
	const [lastImage, setLastImage] = useState<string>("")
	const [lastRawObservation, setLastRawObservation] = useState<string>("")
	const [isLoadingAction, setLoadingAction] = useState(false)

	const [action, setAction] = useState<string>("Nothing to say yet.")
	const lastEvent = useRef("")

	const handleOnEvent = (event: string, needAnswer: boolean) => {
	lastEvent.current = event
	setLoadingAction(true)
	startTransition(async () => {
	try {
	const action = await think(event, needAnswer)

	// here what could happen is that we received a message more recent than what the LLM is currently working on
	// when that happen, the best is to just interrupt the LLM (well.. in our case, it means ignore what it says)
	const canSetAction = action && lastEvent.current === event

	if (canSetAction) {
	setAction(action)
	}
	} catch (err) {
	console.error(err)
	} finally {
	setLoadingAction(false)
	}
	})
	}
	// receive a new observation from what the agent is looking at
	const handleOnObserve = (observation: string, image: string) => {
	setLastRawObservation(observation)
	setLastImage(image)
	if (!observation) { return }
	// handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are seeing this: ${observation}`)
	handleOnEvent(`(looking at at ${observation})`, false)
	}

	const handleOnListen = (recording: string) => {
	if (!recording \|\| recording === "[BLANK_AUDIO]") { return }
	// handleOnEvent(`It is ${format(new Date(), 'HH:mm')} and you are hearing this: ${recording}`)
	handleOnEvent(`${recording}`, true)

	}

	return (
	<div className="w-screen h-screen bg-zinc-100">

	<div className="fixed z-10 left-0 right-0 flex flex-col items-center justify-center">
	<div className={cn(
	`flex flex-col md:flex-row`,
	`items-center justify-between`,
	`w-full md:w-[90%] lg:w-[80%]`,
	`p-2 mt-0 md:p-4 md:mt-8`,
	`bg-zinc-100 md:rounded-xl`,
	`shadow-2xl text-xs md:text-sm`
	)}>
	<div className="flex flex-row space-x-4 w-full md:w-1/2 p-2 md:p-4">
	<div className="flex w-[112px]">
	{lastImage ?
	<div className="w-28 aspect-video">
	<img
	src={lastImage}
	alt="screenshot"
	className="rounded-lg shadow-xl border border-zinc-500"
	/>
	</div> : null}
	</div>

	<div className="text-lg flex-grow italic">
	<span className="text-zinc-700 text-lg">
	{lastRawObservation}
	</span>
	</div>
	</div>


	<div className="flex flex-row w-full md:w-1/2 p-2 md:p-4">

	<div className="w-full text-zinc-800 text-lg">
	{action}
	</div>
	</div>
	</div>
	</div>

	<Observe onObserve={handleOnObserve} />
	<Listen onListen={handleOnListen} />
	<Speak>{action}</Speak>
	<Toaster />

	<Progress
	isLoading={isLoadingAction}
	resetKey=""
	className="left-6 right-0"
	/>

	<div className="fixed z-10 left-0 right-0 bottom-0 flex flex-col items-center justify-center">
	<div className="full md:w-[80%] lg:w-[70%] mb-0 md:p-4 md:mb-8 bg-zinc-100 md:rounded-xl p-4 shadow-2xl text-xs md:text-sm">
	<p>🅿️ <span className="font-semibold">
	</span>A multimodal demo to make
	<a href="https://huggingface.co/meta-llama" target="_blank" className="font-semibold underline"> Llama-2 </a> hear, see and talk.
	You need a laptop computer with <a href="https://caniuse.com/webgpu" target="_blank" className="font-semibold underline">a modern browser supporting WebGPU</a>.
	Vision is handled by <a href="https://huggingface.co/HuggingFaceM4/idefics-80b#bias-evaluation" target="_blank" className="font-semibold underline"> IDEFICS </a></p>
	<p>⛔️ <span className="font-semibold">Limitations: </span>This demo is provided as-is, for demonstration and research purpose only. As it demonstrates WebGPU technology, this demo will not support incompatible browsers and/or devices. No guarantee of factually correct results. In some cases, the models may return hallucinated or innapropriate responses.</p>
	</div>
	</div>
	</div>
	)
	}