Spaces:
Paused
Paused
Commit
•
7249a2e
1
Parent(s):
6caeb80
add support for whisper-turbo (base)
Browse files- package-lock.json +0 -0
- package.json +4 -1
- src/app/engine/listen.ts +46 -0
- src/app/engine/think.ts +27 -19
- src/app/listen.tsx +273 -28
- src/app/main.tsx +19 -29
- src/app/observe.tsx +4 -4
- src/app/speak.tsx +5 -2
- src/components/ui/dialog.tsx +1 -2
- src/components/ui/toast.tsx +127 -0
- src/components/ui/toaster.tsx +35 -0
- src/components/ui/use-toast.ts +192 -0
- src/lib/blobToBase64Uri.ts +18 -0
- src/types.ts +11 -0
package-lock.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
package.json
CHANGED
@@ -24,6 +24,7 @@
|
|
24 |
"@radix-ui/react-separator": "^1.0.3",
|
25 |
"@radix-ui/react-slot": "^1.0.2",
|
26 |
"@radix-ui/react-switch": "^1.0.3",
|
|
|
27 |
"@radix-ui/react-tooltip": "^1.0.6",
|
28 |
"@react-pdf/renderer": "^3.1.12",
|
29 |
"@types/node": "20.4.2",
|
@@ -59,7 +60,9 @@
|
|
59 |
"tts-react": "^3.0.1",
|
60 |
"typescript": "5.1.6",
|
61 |
"usehooks-ts": "^2.9.1",
|
62 |
-
"uuid": "^9.0.0"
|
|
|
|
|
63 |
},
|
64 |
"devDependencies": {
|
65 |
"@types/sbd": "^1.0.3"
|
|
|
24 |
"@radix-ui/react-separator": "^1.0.3",
|
25 |
"@radix-ui/react-slot": "^1.0.2",
|
26 |
"@radix-ui/react-switch": "^1.0.3",
|
27 |
+
"@radix-ui/react-toast": "^1.1.4",
|
28 |
"@radix-ui/react-tooltip": "^1.0.6",
|
29 |
"@react-pdf/renderer": "^3.1.12",
|
30 |
"@types/node": "20.4.2",
|
|
|
60 |
"tts-react": "^3.0.1",
|
61 |
"typescript": "5.1.6",
|
62 |
"usehooks-ts": "^2.9.1",
|
63 |
+
"uuid": "^9.0.0",
|
64 |
+
"webm-to-wav-converter": "^1.1.0",
|
65 |
+
"whisper-turbo": "^0.7.0"
|
66 |
},
|
67 |
"devDependencies": {
|
68 |
"@types/sbd": "^1.0.3"
|
src/app/engine/listen.ts
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use server"
|
2 |
+
|
3 |
+
import { SoundAnalysisRequest, SoundAnalysisResponse } from "@/types"
|
4 |
+
|
5 |
+
const apiUrl = `${process.env.RENDERING_ENGINE_API || ""}`
|
6 |
+
|
7 |
+
export async function listen(sound: string): Promise<string> {
|
8 |
+
if (!sound?.length) {
|
9 |
+
console.log(`cannot call the API without a sound, aborting..`)
|
10 |
+
// throw new Error(`cannot call the API without a sound, aborting..`)
|
11 |
+
return ""
|
12 |
+
}
|
13 |
+
|
14 |
+
try {
|
15 |
+
const request = {
|
16 |
+
sound
|
17 |
+
} as SoundAnalysisRequest
|
18 |
+
|
19 |
+
console.log(`calling ${apiUrl}/listen called with: `, {
|
20 |
+
sound: request.sound.slice(0, 20)
|
21 |
+
})
|
22 |
+
|
23 |
+
const res = await fetch(`${apiUrl}/listen`, {
|
24 |
+
method: "POST",
|
25 |
+
headers: {
|
26 |
+
Accept: "application/json",
|
27 |
+
"Content-Type": "application/json",
|
28 |
+
// Authorization: `Bearer ${process.env.VC_SECRET_ACCESS_TOKEN}`,
|
29 |
+
},
|
30 |
+
body: JSON.stringify(request),
|
31 |
+
cache: 'no-store',
|
32 |
+
// we can also use this (see https://vercel.com/blog/vercel-cache-api-nextjs-cache)
|
33 |
+
// next: { revalidate: 1 }
|
34 |
+
})
|
35 |
+
|
36 |
+
if (res.status !== 200) {
|
37 |
+
throw new Error('Failed to fetch data')
|
38 |
+
}
|
39 |
+
|
40 |
+
const response = (await res.json()) as SoundAnalysisResponse
|
41 |
+
return response.result
|
42 |
+
} catch (err) {
|
43 |
+
console.error(err)
|
44 |
+
return ""
|
45 |
+
}
|
46 |
+
}
|
src/app/engine/think.ts
CHANGED
@@ -5,35 +5,38 @@ import { createLlamaPrompt } from "@/lib/createLlamaPrompt"
|
|
5 |
|
6 |
import { predict } from "./predict"
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
observation: string;
|
15 |
-
history: string;
|
16 |
-
}): Promise<string> => {
|
17 |
if (!event) {
|
18 |
throw new Error("missing event")
|
19 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
const prompt = createLlamaPrompt([
|
21 |
{
|
22 |
role: "system",
|
23 |
content: [
|
24 |
-
`You are a companion robot, very friendly, curious about the world.`,
|
25 |
-
|
26 |
-
// TODO: put the history here (from most recent to oldest)
|
27 |
-
`You have been presented some situation in the past, but you lost your memory.`,
|
28 |
-
|
29 |
`Today's date is ${format(new Date(), 'yyyy-MM-dd at HH:mm (d)')}.`,
|
30 |
-
|
|
|
|
|
|
|
|
|
31 |
].filter(item => item).join("\n")
|
32 |
},
|
33 |
-
|
34 |
-
role: "user",
|
35 |
-
content: event,
|
36 |
-
}
|
37 |
])
|
38 |
|
39 |
|
@@ -56,5 +59,10 @@ export const think = async ({
|
|
56 |
// llama-2 is too chatty, let's keep 3 sentences at most
|
57 |
const sentences = sbd.sentences(result).slice(0, 3).join(" ").trim()
|
58 |
|
|
|
|
|
|
|
|
|
|
|
59 |
return sentences
|
60 |
}
|
|
|
5 |
|
6 |
import { predict } from "./predict"
|
7 |
|
8 |
+
const internalHistory: {
|
9 |
+
role: string;
|
10 |
+
content: string;
|
11 |
+
}[] = []
|
12 |
+
|
13 |
+
export const think = async (event: string): Promise<string> => {
|
|
|
|
|
|
|
14 |
if (!event) {
|
15 |
throw new Error("missing event")
|
16 |
}
|
17 |
+
|
18 |
+
internalHistory.push({
|
19 |
+
role: "user",
|
20 |
+
content: event,
|
21 |
+
})
|
22 |
+
|
23 |
+
if (internalHistory.length > 10) {
|
24 |
+
internalHistory.shift()
|
25 |
+
}
|
26 |
+
|
27 |
const prompt = createLlamaPrompt([
|
28 |
{
|
29 |
role: "system",
|
30 |
content: [
|
|
|
|
|
|
|
|
|
|
|
31 |
`Today's date is ${format(new Date(), 'yyyy-MM-dd at HH:mm (d)')}.`,
|
32 |
+
`You are an android robot, very friendly, curious about the world.`,
|
33 |
+
`Your life goal is to help human and interact them as a natural way.`,
|
34 |
+
`You are going to see and hear various things, and you need to act in a very natural way.`,
|
35 |
+
`If you see someone through your eyes, you need to interact with them,`,
|
36 |
+
`you should be o ngoing and open, ask questions, be curious, do jokes etc.`,
|
37 |
].filter(item => item).join("\n")
|
38 |
},
|
39 |
+
...internalHistory,
|
|
|
|
|
|
|
40 |
])
|
41 |
|
42 |
|
|
|
59 |
// llama-2 is too chatty, let's keep 3 sentences at most
|
60 |
const sentences = sbd.sentences(result).slice(0, 3).join(" ").trim()
|
61 |
|
62 |
+
internalHistory.push({
|
63 |
+
role: "assistant",
|
64 |
+
content: sentences,
|
65 |
+
})
|
66 |
+
|
67 |
return sentences
|
68 |
}
|
src/app/listen.tsx
CHANGED
@@ -2,52 +2,297 @@
|
|
2 |
|
3 |
import { useCallback, useEffect, useRef, useState, useTransition } from "react"
|
4 |
import { useInterval } from "usehooks-ts"
|
|
|
|
|
5 |
import { useRecorder } from "react-microphone-recorder"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
// import { listen } from "./engine/listen"
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
export function Listen({
|
10 |
onListen,
|
11 |
}: {
|
12 |
onListen: (recording: string) => void
|
13 |
}) {
|
14 |
-
const
|
15 |
-
|
16 |
-
const
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
useInterval(() => {
|
31 |
-
console.log("let's stop, and start again")
|
32 |
-
stopRecording()
|
33 |
-
startRecording()
|
34 |
}, 3000)
|
35 |
|
36 |
useEffect(() => {
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
-
|
41 |
-
// await listen()
|
42 |
-
})
|
43 |
}, [])
|
44 |
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
return (
|
48 |
-
<div className="fixed top-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
</div>
|
51 |
)
|
52 |
-
*/
|
53 |
}
|
|
|
2 |
|
3 |
import { useCallback, useEffect, useRef, useState, useTransition } from "react"
|
4 |
import { useInterval } from "usehooks-ts"
|
5 |
+
|
6 |
+
// TODO: try this? https://www.npmjs.com/package/react-audio-voice-recorder
|
7 |
import { useRecorder } from "react-microphone-recorder"
|
8 |
+
import { getWaveBlob } from "webm-to-wav-converter"
|
9 |
+
import {
|
10 |
+
AvailableModels,
|
11 |
+
InferenceSession,
|
12 |
+
MicRecorder,
|
13 |
+
SessionManager,
|
14 |
+
} from "whisper-turbo"
|
15 |
+
|
16 |
+
import { useToast } from "@/components/ui/use-toast"
|
17 |
+
// import { listen } from "@/app/engine/listen"
|
18 |
+
import { blobToBase64Uri } from "@/lib/blobToBase64Uri"
|
19 |
|
20 |
// import { listen } from "./engine/listen"
|
21 |
|
22 |
+
export interface TSSegment {
|
23 |
+
text: string;
|
24 |
+
start: number;
|
25 |
+
stop: number;
|
26 |
+
last: boolean;
|
27 |
+
}
|
28 |
+
|
29 |
+
export interface TSTranscript {
|
30 |
+
segments: Array<TSSegment>;
|
31 |
+
}
|
32 |
+
|
33 |
export function Listen({
|
34 |
onListen,
|
35 |
}: {
|
36 |
onListen: (recording: string) => void
|
37 |
}) {
|
38 |
+
const { toast } = useToast()
|
39 |
+
|
40 |
+
const [transcribing, setTranscribing] = useState(false)
|
41 |
+
const transcribingRef = useRef(transcribing)
|
42 |
+
useEffect(() => { transcribingRef.current = transcribing }, [transcribing])
|
43 |
+
|
44 |
+
// used to detect changes, signal when we can analyze the audio
|
45 |
+
const [audioDataFrame, setAudioDataFrame] = useState(0)
|
46 |
+
const audioDataFrameRef = useRef(audioDataFrame)
|
47 |
+
useEffect(() => { audioDataFrameRef.current = audioDataFrame }, [audioDataFrame])
|
48 |
+
|
49 |
+
const [transcriptBuffer, setTranscriptBuffer] = useState("")
|
50 |
+
useEffect(() => {
|
51 |
+
onListen(transcriptBuffer)
|
52 |
+
}, [transcriptBuffer])
|
53 |
+
/*
|
54 |
+
Available models: {
|
55 |
+
WHISPER_TINY: 'whisper-tiny',
|
56 |
+
WHISPER_BASE: 'whisper-base',
|
57 |
+
WHISPER_SMALL: 'whisper-small',
|
58 |
+
WHISPER_MEDIUM: 'whisper-medium',
|
59 |
+
WHISPER_LARGE: 'whisper-large'
|
60 |
+
}
|
61 |
+
*/
|
62 |
+
const whisperModel: AvailableModels = AvailableModels.WHISPER_BASE
|
63 |
+
|
64 |
+
const listenerRef = useRef({
|
65 |
+
isListening: false,
|
66 |
+
startedListeningAt: 0,
|
67 |
+
stoppedListeningAt: 0,
|
68 |
+
durationInMs: 0,
|
69 |
+
hits: 0,
|
70 |
+
debugCanContinue: true, // used for debugging
|
71 |
+
})
|
72 |
+
|
73 |
+
// the background listener is not a CIA spy device, but a detect of changes in the
|
74 |
+
// background noise volume level. The goal is to detect whenever an interesting event is happening
|
75 |
+
const backgroundListener = useRecorder()
|
76 |
+
|
77 |
+
// the foreground listener is the actual sound sampler
|
78 |
+
// with out system, it will always lag a bit behind the background listener
|
79 |
+
// however there might be a fix (which I haven't tried yet):
|
80 |
+
// to take the last second of the background listener sample,
|
81 |
+
// and glue it to the beginning of the foreground listener sample
|
82 |
+
//
|
83 |
+
// or, alternatively, we could just try to use a shorter time window for the background listener,
|
84 |
+
// to make it more reactive
|
85 |
+
const foregroundListener = useRecorder()
|
86 |
+
|
87 |
+
// to detect voice, we use a combination of audio level and frequency sampling
|
88 |
+
const heardSomething = backgroundListener.audioLevel > 12 // 18
|
89 |
+
|
90 |
+
const status = heardSomething ? "I hear something!" : "background noise"
|
91 |
+
|
92 |
+
const session = useRef<InferenceSession | null>(null)
|
93 |
+
const [audioData, setAudioData] = useState<Uint8Array | null>(null)
|
94 |
+
const [audioMetadata, setAudioMetadata] = useState<File | null>(null)
|
95 |
+
const [loaded, setLoaded] = useState<boolean>(false)
|
96 |
+
const [progress, setProgress] = useState<number>(0)
|
97 |
+
|
98 |
+
const isLoadingModel = progress > 0
|
99 |
+
const hasLoadedModel = progress === 100
|
100 |
+
|
101 |
+
const loadModel = async () => {
|
102 |
+
console.log("loadModel")
|
103 |
+
if (session.current) {
|
104 |
+
session.current.destroy()
|
105 |
+
}
|
106 |
+
if (!whisperModel) {
|
107 |
+
console.error("No whisper model loaded")
|
108 |
+
return
|
109 |
+
}
|
110 |
+
|
111 |
+
try {
|
112 |
+
const manager = new SessionManager()
|
113 |
+
const loadResult = await manager.loadModel(
|
114 |
+
whisperModel,
|
115 |
+
() => {
|
116 |
+
setLoaded(true)
|
117 |
+
},
|
118 |
+
(p: number) => {
|
119 |
+
console.log("progress:", p)
|
120 |
+
setProgress(p)
|
121 |
+
}
|
122 |
+
)
|
123 |
+
if (loadResult.isErr) {
|
124 |
+
throw new Error(loadResult.error.message)
|
125 |
+
} else {
|
126 |
+
session.current = loadResult.value
|
127 |
+
}
|
128 |
+
} catch (err) {
|
129 |
+
const error = `failed to load the model: ${err}`
|
130 |
+
console.error(error)
|
131 |
+
toast({
|
132 |
+
title: "Error",
|
133 |
+
description: error,
|
134 |
+
variant: "destructive"
|
135 |
+
})
|
136 |
+
}
|
137 |
+
}
|
138 |
|
139 |
+
const runSession = async () => {
|
140 |
+
if (!loaded) {
|
141 |
+
console.log("runSession: aborting (model not loaded yet)")
|
142 |
+
return
|
143 |
+
}
|
144 |
+
if (!session.current) {
|
145 |
+
console.log("runSession: aborting (no model loaded)")
|
146 |
+
toast({
|
147 |
+
title: "Error",
|
148 |
+
description: "No model loaded",
|
149 |
+
variant: "destructive"
|
150 |
+
})
|
151 |
+
return
|
152 |
+
}
|
153 |
+
// console.log("debug:", { audioData, audioDataFrame })
|
154 |
+
if (!audioData) {
|
155 |
+
console.log("runSession: aborting (no audio file loaded)")
|
156 |
+
toast({
|
157 |
+
title: "Error",
|
158 |
+
description: "No audio file loaded",
|
159 |
+
variant: "destructive"
|
160 |
+
})
|
161 |
+
return
|
162 |
+
}
|
163 |
+
|
164 |
+
setTranscribing(transcribingRef.current = true)
|
165 |
+
|
166 |
+
try {
|
167 |
+
await session.current.transcribe(audioData, (s: any) => {
|
168 |
+
const segment = s as { text: string, start: number, stop: number, last: boolean }
|
169 |
+
const text = segment.text.trim()
|
170 |
+
console.log("text:", text)
|
171 |
+
if (text) {
|
172 |
+
setTranscriptBuffer(text)
|
173 |
+
}
|
174 |
+
|
175 |
+
if (s.last) {
|
176 |
+
console.log("IS LAST")
|
177 |
+
setTranscribing(transcribingRef.current = false)
|
178 |
+
return
|
179 |
+
}
|
180 |
+
})
|
181 |
+
} catch (err) {
|
182 |
+
const error = `transcription crashed: ${err}`
|
183 |
+
console.error(error)
|
184 |
+
toast({
|
185 |
+
title: "Error",
|
186 |
+
description: "No audio file loaded",
|
187 |
+
variant: "destructive"
|
188 |
+
})
|
189 |
+
}
|
190 |
+
}
|
191 |
+
|
192 |
+
// let's disable the background recorder for now
|
193 |
useInterval(() => {
|
194 |
+
// console.log("let's stop, and start again")
|
195 |
+
backgroundListener.stopRecording()
|
196 |
+
backgroundListener.startRecording()
|
197 |
}, 3000)
|
198 |
|
199 |
useEffect(() => {
|
200 |
+
const fn = async () => {
|
201 |
+
console.log("load model..")
|
202 |
+
await loadModel()
|
203 |
+
|
204 |
+
console.log("starting to listen to background noise to detect volume peaks..")
|
205 |
+
backgroundListener.startRecording()
|
206 |
+
}
|
207 |
|
208 |
+
fn()
|
|
|
|
|
209 |
}, [])
|
210 |
|
211 |
+
|
212 |
+
useEffect(() => {
|
213 |
+
if (!audioData) {
|
214 |
+
console.log("no audio")
|
215 |
+
}
|
216 |
+
// console.log("audioDataFrame changed, need to process audioData!")
|
217 |
+
runSession()
|
218 |
+
}, [audioDataFrame])
|
219 |
+
|
220 |
+
useEffect(() => {
|
221 |
+
if (heardSomething) {
|
222 |
+
if (!listenerRef.current.isListening) {
|
223 |
+
console.log("recoording..")
|
224 |
+
foregroundListener.startRecording()
|
225 |
+
listenerRef.current.hits = 0
|
226 |
+
listenerRef.current.isListening = true
|
227 |
+
|
228 |
+
// TODO: use a debouncer to detect when we started speaking
|
229 |
+
setTimeout(async () => {
|
230 |
+
foregroundListener.stopRecording()
|
231 |
+
listenerRef.current.isListening = false
|
232 |
+
listenerRef.current.stoppedListeningAt = Date.now()
|
233 |
+
listenerRef.current.durationInMs =
|
234 |
+
listenerRef.current.stoppedListeningAt - listenerRef.current.startedListeningAt
|
235 |
+
|
236 |
+
const hits = listenerRef.current.hits
|
237 |
+
|
238 |
+
console.log(`end of sample (${foregroundListener.timeElapsed}, ${hits} hits)`)
|
239 |
+
|
240 |
+
if (!foregroundListener.audioBlob || typeof window === "undefined" || !window?.FileReader) {
|
241 |
+
return
|
242 |
+
}
|
243 |
+
|
244 |
+
if (hits > 11) {
|
245 |
+
// at 12 threshold level, we should have between 12 and 20 hits (per 2 sec) for short words and utterances
|
246 |
+
// at 12 threshold level, keystrokes should not be detected, unless the person hits the keyboard heavily
|
247 |
+
|
248 |
+
console.log("got an interesting sample, sending for review")
|
249 |
+
|
250 |
+
// temporary, to prevent infinite loop
|
251 |
+
if (listenerRef.current.debugCanContinue) {
|
252 |
+
// to prevent the infinite loop, set this value to false
|
253 |
+
// listenerRef.current.debugCanContinue = false
|
254 |
+
|
255 |
+
try {
|
256 |
+
const blob = await getWaveBlob(foregroundListener.audioBlob, false) // false = 16 bit, true = 32 bit
|
257 |
+
const arrayBuffer = await blob.arrayBuffer()
|
258 |
+
const uint8Array = new Uint8Array(arrayBuffer)
|
259 |
+
|
260 |
+
setAudioData(uint8Array)
|
261 |
+
setAudioDataFrame(audioDataFrameRef.current + 1)
|
262 |
+
} catch (err) {
|
263 |
+
const error = `failed to convert the audio sample: ${err}`
|
264 |
+
console.error(error)
|
265 |
+
toast({
|
266 |
+
title: "Error",
|
267 |
+
description: error,
|
268 |
+
variant: "destructive"
|
269 |
+
})
|
270 |
+
}
|
271 |
+
} else {
|
272 |
+
console.log("Julian: infinite loop temporary disabled :D")
|
273 |
+
}
|
274 |
+
}
|
275 |
+
}, 3000)
|
276 |
+
} else {
|
277 |
+
// TODO: increase hits?
|
278 |
+
// listenerRef.current.hits = listenerRef.current.hits + 1
|
279 |
+
}
|
280 |
+
}
|
281 |
+
}, [heardSomething])
|
282 |
+
|
283 |
+
if (heardSomething && listenerRef.current.isListening) {
|
284 |
+
listenerRef.current.hits = listenerRef.current.hits + 1
|
285 |
+
}
|
286 |
+
|
287 |
return (
|
288 |
+
<div className="fixed top-80 left-16 z-10 bg-gray-100 p-4">
|
289 |
+
{isLoadingModel && hasLoadedModel
|
290 |
+
? <p>Loading: ${progress}%</p>
|
291 |
+
: <p>{
|
292 |
+
transcriptBuffer
|
293 |
+
|| ""
|
294 |
+
}</p>
|
295 |
+
}
|
296 |
</div>
|
297 |
)
|
|
|
298 |
}
|
src/app/main.tsx
CHANGED
@@ -10,6 +10,7 @@ import { think } from "./engine/think"
|
|
10 |
import { Progress } from "./interface/progress"
|
11 |
import { Listen } from "./listen"
|
12 |
import { Speak } from "./speak"
|
|
|
13 |
|
14 |
export default function Main() {
|
15 |
const [_isPending, startTransition] = useTransition()
|
@@ -17,38 +18,25 @@ export default function Main() {
|
|
17 |
const [lastRawObservation, setLastRawObservation] = useState<string>("")
|
18 |
const [isLoadingAction, setLoadingAction] = useState(false)
|
19 |
|
20 |
-
const [observations, setObservations] = useState<string[]>([])
|
21 |
const [action, setAction] = useState<string>("Nothing to say yet.")
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
setLastRawObservation(observation)
|
26 |
-
setLastImage(image)
|
27 |
-
|
28 |
-
// last comes first
|
29 |
-
setObservations([
|
30 |
-
`On ${format(new Date(), 'yyyy-MM-dd at HH:mm (d)')}, you saw: \"${observation}\".`
|
31 |
-
].concat(observations))
|
32 |
-
|
33 |
-
// TODO: use llama-2 to summarize previous observations
|
34 |
-
const history = observations.slice(0, 3).join("\n")
|
35 |
-
|
36 |
-
|
37 |
startTransition(async () => {
|
38 |
-
|
39 |
-
const action = await think({
|
40 |
-
history,
|
41 |
-
observation,
|
42 |
-
event: "Please react in a natural way to the current situation, by interacting with the person or entity you are seeing.",
|
43 |
-
})
|
44 |
-
|
45 |
setAction(action)
|
46 |
setLoadingAction(false)
|
47 |
})
|
48 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
const handleOnListen = (recording: string) => {
|
51 |
-
|
52 |
}
|
53 |
|
54 |
return (
|
@@ -93,8 +81,9 @@ export default function Main() {
|
|
93 |
</div>
|
94 |
|
95 |
<Observe onObserve={handleOnObserve} />
|
96 |
-
|
97 |
<Speak>{action}</Speak>
|
|
|
98 |
|
99 |
<Progress
|
100 |
isLoading={isLoadingAction}
|
@@ -104,11 +93,12 @@ export default function Main() {
|
|
104 |
|
105 |
<div className="fixed z-10 left-0 right-0 bottom-0 flex flex-col items-center justify-center">
|
106 |
<div className="full md:w-[80%] lg:w-[70%] mb-0 md:p-4 md:mb-8 bg-zinc-100 md:rounded-xl p-4 shadow-2xl text-xs md:text-sm">
|
107 |
-
<p>🅿️ <span className="font-semibold">
|
108 |
-
|
109 |
-
and
|
110 |
-
<a href="https://
|
111 |
-
|
|
|
112 |
</div>
|
113 |
</div>
|
114 |
</div>
|
|
|
10 |
import { Progress } from "./interface/progress"
|
11 |
import { Listen } from "./listen"
|
12 |
import { Speak } from "./speak"
|
13 |
+
import { Toaster } from "@/components/ui/toaster"
|
14 |
|
15 |
export default function Main() {
|
16 |
const [_isPending, startTransition] = useTransition()
|
|
|
18 |
const [lastRawObservation, setLastRawObservation] = useState<string>("")
|
19 |
const [isLoadingAction, setLoadingAction] = useState(false)
|
20 |
|
|
|
21 |
const [action, setAction] = useState<string>("Nothing to say yet.")
|
22 |
|
23 |
+
const handleOnEvent = (event: string) => {
|
24 |
+
setLoadingAction(true)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
startTransition(async () => {
|
26 |
+
const action = await think(event)
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
setAction(action)
|
28 |
setLoadingAction(false)
|
29 |
})
|
30 |
}
|
31 |
+
// receive a new observation from what the agent is looking at
|
32 |
+
const handleOnObserve = (observation: string, image: string) => {
|
33 |
+
setLastRawObservation(observation)
|
34 |
+
setLastImage(image)
|
35 |
+
handleOnEvent(`It is ${format(new Date(), 'HH:mm (d)')} and you are seeing this: ${observation}`)
|
36 |
+
}
|
37 |
|
38 |
const handleOnListen = (recording: string) => {
|
39 |
+
handleOnEvent(`It is ${format(new Date(), 'HH:mm (d)')} and you are hearing this: ${recording}`)
|
40 |
}
|
41 |
|
42 |
return (
|
|
|
81 |
</div>
|
82 |
|
83 |
<Observe onObserve={handleOnObserve} />
|
84 |
+
<Listen onListen={handleOnListen} />
|
85 |
<Speak>{action}</Speak>
|
86 |
+
<Toaster />
|
87 |
|
88 |
<Progress
|
89 |
isLoading={isLoadingAction}
|
|
|
93 |
|
94 |
<div className="fixed z-10 left-0 right-0 bottom-0 flex flex-col items-center justify-center">
|
95 |
<div className="full md:w-[80%] lg:w-[70%] mb-0 md:p-4 md:mb-8 bg-zinc-100 md:rounded-xl p-4 shadow-2xl text-xs md:text-sm">
|
96 |
+
<p>🅿️ <span className="font-semibold">
|
97 |
+
</span>This multimodal demo allow
|
98 |
+
<a href="https://huggingface.co/meta-llama" target="_blank" className="font-semibold underline"> Llama-2 </a> to hear, see and talk.
|
99 |
+
You need to upgrade to a <a href="https://caniuse.com/webgpu" target="_blank" className="font-semibold underline">browser with support for WebGPU</a> for speech recognition to work.
|
100 |
+
Vision is handled by <a href="https://huggingface.co/HuggingFaceM4/idefics-80b#bias-evaluation" target="_blank" className="font-semibold underline"> IDEFICS </a></p>
|
101 |
+
<p>⛔️ <span className="font-semibold">Limitations: </span>This demo is provided as-is, for demonstration and research purpose only. As it demonstrates WebGPU technology, this demo will not support incompatible browsers and/or devices. No guarantee of factually correct results. In some cases, the models may return hallucinated or innapropriate responses.</p>
|
102 |
</div>
|
103 |
</div>
|
104 |
</div>
|
src/app/observe.tsx
CHANGED
@@ -66,7 +66,7 @@ export function Observe({
|
|
66 |
|
67 |
setBusy(true)
|
68 |
|
69 |
-
console.log("Capturing new frame from webcam..")
|
70 |
|
71 |
startTransition(async () => {
|
72 |
const imageBase64 = capture()
|
@@ -80,10 +80,10 @@ export function Observe({
|
|
80 |
}
|
81 |
const prompt = `What do you see here?`
|
82 |
|
83 |
-
console.log("Calling IDEFICS..")
|
84 |
-
const newObservation = await see({ prompt, imageBase64 })
|
85 |
|
86 |
-
console.log("New observation: ", newObservation)
|
87 |
if (newObservation !== lastObservation) {
|
88 |
// console.log("update!")
|
89 |
setLastObservation(newObservation || "")
|
|
|
66 |
|
67 |
setBusy(true)
|
68 |
|
69 |
+
// console.log("Capturing new frame from webcam..")
|
70 |
|
71 |
startTransition(async () => {
|
72 |
const imageBase64 = capture()
|
|
|
80 |
}
|
81 |
const prompt = `What do you see here?`
|
82 |
|
83 |
+
// console.log("Calling IDEFICS..")
|
84 |
+
const newObservation = "fake" // await see({ prompt, imageBase64 })
|
85 |
|
86 |
+
// console.log("New observation: ", newObservation)
|
87 |
if (newObservation !== lastObservation) {
|
88 |
// console.log("update!")
|
89 |
setLastObservation(newObservation || "")
|
src/app/speak.tsx
CHANGED
@@ -46,11 +46,14 @@ export function Speak({
|
|
46 |
if (newMessage === playedMessage) { return }
|
47 |
const synth = window.speechSynthesis
|
48 |
|
49 |
-
console.log(`Speaking "${newMessage}"`)
|
50 |
setPlayedMessage(newMessage)
|
51 |
const utterance = new SpeechSynthesisUtterance(newMessage)
|
52 |
utterance.voice = voice
|
53 |
-
|
|
|
|
|
|
|
54 |
}, [voice?.name, newMessage, playedMessage])
|
55 |
|
56 |
return (
|
|
|
46 |
if (newMessage === playedMessage) { return }
|
47 |
const synth = window.speechSynthesis
|
48 |
|
49 |
+
// console.log(`Speaking "${newMessage}"`)
|
50 |
setPlayedMessage(newMessage)
|
51 |
const utterance = new SpeechSynthesisUtterance(newMessage)
|
52 |
utterance.voice = voice
|
53 |
+
|
54 |
+
console.log("julian: voice disabled :D")
|
55 |
+
// synth.speak(utterance)
|
56 |
+
|
57 |
}, [voice?.name, newMessage, playedMessage])
|
58 |
|
59 |
return (
|
src/components/ui/dialog.tsx
CHANGED
@@ -11,10 +11,9 @@ const Dialog = DialogPrimitive.Root
|
|
11 |
const DialogTrigger = DialogPrimitive.Trigger
|
12 |
|
13 |
const DialogPortal = ({
|
14 |
-
className,
|
15 |
...props
|
16 |
}: DialogPrimitive.DialogPortalProps) => (
|
17 |
-
<DialogPrimitive.Portal
|
18 |
)
|
19 |
DialogPortal.displayName = DialogPrimitive.Portal.displayName
|
20 |
|
|
|
11 |
const DialogTrigger = DialogPrimitive.Trigger
|
12 |
|
13 |
const DialogPortal = ({
|
|
|
14 |
...props
|
15 |
}: DialogPrimitive.DialogPortalProps) => (
|
16 |
+
<DialogPrimitive.Portal {...props} />
|
17 |
)
|
18 |
DialogPortal.displayName = DialogPrimitive.Portal.displayName
|
19 |
|
src/components/ui/toast.tsx
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import * as React from "react"
|
2 |
+
import * as ToastPrimitives from "@radix-ui/react-toast"
|
3 |
+
import { cva, type VariantProps } from "class-variance-authority"
|
4 |
+
import { X } from "lucide-react"
|
5 |
+
|
6 |
+
import { cn } from "@/lib/utils"
|
7 |
+
|
8 |
+
const ToastProvider = ToastPrimitives.Provider
|
9 |
+
|
10 |
+
const ToastViewport = React.forwardRef<
|
11 |
+
React.ElementRef<typeof ToastPrimitives.Viewport>,
|
12 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Viewport>
|
13 |
+
>(({ className, ...props }, ref) => (
|
14 |
+
<ToastPrimitives.Viewport
|
15 |
+
ref={ref}
|
16 |
+
className={cn(
|
17 |
+
"fixed top-0 z-[100] flex max-h-screen w-full flex-col-reverse p-4 sm:bottom-0 sm:right-0 sm:top-auto sm:flex-col md:max-w-[420px]",
|
18 |
+
className
|
19 |
+
)}
|
20 |
+
{...props}
|
21 |
+
/>
|
22 |
+
))
|
23 |
+
ToastViewport.displayName = ToastPrimitives.Viewport.displayName
|
24 |
+
|
25 |
+
const toastVariants = cva(
|
26 |
+
"group pointer-events-auto relative flex w-full items-center justify-between space-x-4 overflow-hidden rounded-md border border-stone-200 p-6 pr-8 shadow-lg transition-all data-[swipe=cancel]:translate-x-0 data-[swipe=end]:translate-x-[var(--radix-toast-swipe-end-x)] data-[swipe=move]:translate-x-[var(--radix-toast-swipe-move-x)] data-[swipe=move]:transition-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[swipe=end]:animate-out data-[state=closed]:fade-out-80 data-[state=closed]:slide-out-to-right-full data-[state=open]:slide-in-from-top-full data-[state=open]:sm:slide-in-from-bottom-full dark:border-stone-800",
|
27 |
+
{
|
28 |
+
variants: {
|
29 |
+
variant: {
|
30 |
+
default: "border bg-white text-stone-950 dark:bg-stone-950 dark:text-stone-50",
|
31 |
+
destructive:
|
32 |
+
"destructive group border-red-500 bg-red-500 text-stone-50 dark:border-red-900 dark:bg-red-900 dark:text-stone-50",
|
33 |
+
},
|
34 |
+
},
|
35 |
+
defaultVariants: {
|
36 |
+
variant: "default",
|
37 |
+
},
|
38 |
+
}
|
39 |
+
)
|
40 |
+
|
41 |
+
const Toast = React.forwardRef<
|
42 |
+
React.ElementRef<typeof ToastPrimitives.Root>,
|
43 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Root> &
|
44 |
+
VariantProps<typeof toastVariants>
|
45 |
+
>(({ className, variant, ...props }, ref) => {
|
46 |
+
return (
|
47 |
+
<ToastPrimitives.Root
|
48 |
+
ref={ref}
|
49 |
+
className={cn(toastVariants({ variant }), className)}
|
50 |
+
{...props}
|
51 |
+
/>
|
52 |
+
)
|
53 |
+
})
|
54 |
+
Toast.displayName = ToastPrimitives.Root.displayName
|
55 |
+
|
56 |
+
const ToastAction = React.forwardRef<
|
57 |
+
React.ElementRef<typeof ToastPrimitives.Action>,
|
58 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Action>
|
59 |
+
>(({ className, ...props }, ref) => (
|
60 |
+
<ToastPrimitives.Action
|
61 |
+
ref={ref}
|
62 |
+
className={cn(
|
63 |
+
"inline-flex h-8 shrink-0 items-center justify-center rounded-md border border-stone-200 bg-transparent px-3 text-sm font-medium ring-offset-white transition-colors hover:bg-stone-100 focus:outline-none focus:ring-2 focus:ring-stone-950 focus:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 group-[.destructive]:border-stone-100/40 group-[.destructive]:hover:border-red-500/30 group-[.destructive]:hover:bg-red-500 group-[.destructive]:hover:text-stone-50 group-[.destructive]:focus:ring-red-500 dark:border-stone-800 dark:ring-offset-stone-950 dark:hover:bg-stone-800 dark:focus:ring-stone-300 dark:group-[.destructive]:border-stone-800/40 dark:group-[.destructive]:hover:border-red-900/30 dark:group-[.destructive]:hover:bg-red-900 dark:group-[.destructive]:hover:text-stone-50 dark:group-[.destructive]:focus:ring-red-900",
|
64 |
+
className
|
65 |
+
)}
|
66 |
+
{...props}
|
67 |
+
/>
|
68 |
+
))
|
69 |
+
ToastAction.displayName = ToastPrimitives.Action.displayName
|
70 |
+
|
71 |
+
const ToastClose = React.forwardRef<
|
72 |
+
React.ElementRef<typeof ToastPrimitives.Close>,
|
73 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Close>
|
74 |
+
>(({ className, ...props }, ref) => (
|
75 |
+
<ToastPrimitives.Close
|
76 |
+
ref={ref}
|
77 |
+
className={cn(
|
78 |
+
"absolute right-2 top-2 rounded-md p-1 text-stone-950/50 opacity-0 transition-opacity hover:text-stone-950 focus:opacity-100 focus:outline-none focus:ring-2 group-hover:opacity-100 group-[.destructive]:text-red-300 group-[.destructive]:hover:text-red-50 group-[.destructive]:focus:ring-red-400 group-[.destructive]:focus:ring-offset-red-600 dark:text-stone-50/50 dark:hover:text-stone-50",
|
79 |
+
className
|
80 |
+
)}
|
81 |
+
toast-close=""
|
82 |
+
{...props}
|
83 |
+
>
|
84 |
+
<X className="h-4 w-4" />
|
85 |
+
</ToastPrimitives.Close>
|
86 |
+
))
|
87 |
+
ToastClose.displayName = ToastPrimitives.Close.displayName
|
88 |
+
|
89 |
+
const ToastTitle = React.forwardRef<
|
90 |
+
React.ElementRef<typeof ToastPrimitives.Title>,
|
91 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Title>
|
92 |
+
>(({ className, ...props }, ref) => (
|
93 |
+
<ToastPrimitives.Title
|
94 |
+
ref={ref}
|
95 |
+
className={cn("text-sm font-semibold", className)}
|
96 |
+
{...props}
|
97 |
+
/>
|
98 |
+
))
|
99 |
+
ToastTitle.displayName = ToastPrimitives.Title.displayName
|
100 |
+
|
101 |
+
const ToastDescription = React.forwardRef<
|
102 |
+
React.ElementRef<typeof ToastPrimitives.Description>,
|
103 |
+
React.ComponentPropsWithoutRef<typeof ToastPrimitives.Description>
|
104 |
+
>(({ className, ...props }, ref) => (
|
105 |
+
<ToastPrimitives.Description
|
106 |
+
ref={ref}
|
107 |
+
className={cn("text-sm opacity-90", className)}
|
108 |
+
{...props}
|
109 |
+
/>
|
110 |
+
))
|
111 |
+
ToastDescription.displayName = ToastPrimitives.Description.displayName
|
112 |
+
|
113 |
+
type ToastProps = React.ComponentPropsWithoutRef<typeof Toast>
|
114 |
+
|
115 |
+
type ToastActionElement = React.ReactElement<typeof ToastAction>
|
116 |
+
|
117 |
+
export {
|
118 |
+
type ToastProps,
|
119 |
+
type ToastActionElement,
|
120 |
+
ToastProvider,
|
121 |
+
ToastViewport,
|
122 |
+
Toast,
|
123 |
+
ToastTitle,
|
124 |
+
ToastDescription,
|
125 |
+
ToastClose,
|
126 |
+
ToastAction,
|
127 |
+
}
|
src/components/ui/toaster.tsx
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"use client"
|
2 |
+
|
3 |
+
import {
|
4 |
+
Toast,
|
5 |
+
ToastClose,
|
6 |
+
ToastDescription,
|
7 |
+
ToastProvider,
|
8 |
+
ToastTitle,
|
9 |
+
ToastViewport,
|
10 |
+
} from "@/components/ui/toast"
|
11 |
+
import { useToast } from "@/components/ui/use-toast"
|
12 |
+
|
13 |
+
export function Toaster() {
|
14 |
+
const { toasts } = useToast()
|
15 |
+
|
16 |
+
return (
|
17 |
+
<ToastProvider>
|
18 |
+
{toasts.map(function ({ id, title, description, action, ...props }) {
|
19 |
+
return (
|
20 |
+
<Toast key={id} {...props}>
|
21 |
+
<div className="grid gap-1">
|
22 |
+
{title && <ToastTitle>{title}</ToastTitle>}
|
23 |
+
{description && (
|
24 |
+
<ToastDescription>{description}</ToastDescription>
|
25 |
+
)}
|
26 |
+
</div>
|
27 |
+
{action}
|
28 |
+
<ToastClose />
|
29 |
+
</Toast>
|
30 |
+
)
|
31 |
+
})}
|
32 |
+
<ToastViewport />
|
33 |
+
</ToastProvider>
|
34 |
+
)
|
35 |
+
}
|
src/components/ui/use-toast.ts
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Inspired by react-hot-toast library
|
2 |
+
import * as React from "react"
|
3 |
+
|
4 |
+
import type {
|
5 |
+
ToastActionElement,
|
6 |
+
ToastProps,
|
7 |
+
} from "@/components/ui/toast"
|
8 |
+
|
9 |
+
const TOAST_LIMIT = 1
|
10 |
+
const TOAST_REMOVE_DELAY = 1000000
|
11 |
+
|
12 |
+
type ToasterToast = ToastProps & {
|
13 |
+
id: string
|
14 |
+
title?: React.ReactNode
|
15 |
+
description?: React.ReactNode
|
16 |
+
action?: ToastActionElement
|
17 |
+
}
|
18 |
+
|
19 |
+
const actionTypes = {
|
20 |
+
ADD_TOAST: "ADD_TOAST",
|
21 |
+
UPDATE_TOAST: "UPDATE_TOAST",
|
22 |
+
DISMISS_TOAST: "DISMISS_TOAST",
|
23 |
+
REMOVE_TOAST: "REMOVE_TOAST",
|
24 |
+
} as const
|
25 |
+
|
26 |
+
let count = 0
|
27 |
+
|
28 |
+
function genId() {
|
29 |
+
count = (count + 1) % Number.MAX_VALUE
|
30 |
+
return count.toString()
|
31 |
+
}
|
32 |
+
|
33 |
+
type ActionType = typeof actionTypes
|
34 |
+
|
35 |
+
type Action =
|
36 |
+
| {
|
37 |
+
type: ActionType["ADD_TOAST"]
|
38 |
+
toast: ToasterToast
|
39 |
+
}
|
40 |
+
| {
|
41 |
+
type: ActionType["UPDATE_TOAST"]
|
42 |
+
toast: Partial<ToasterToast>
|
43 |
+
}
|
44 |
+
| {
|
45 |
+
type: ActionType["DISMISS_TOAST"]
|
46 |
+
toastId?: ToasterToast["id"]
|
47 |
+
}
|
48 |
+
| {
|
49 |
+
type: ActionType["REMOVE_TOAST"]
|
50 |
+
toastId?: ToasterToast["id"]
|
51 |
+
}
|
52 |
+
|
53 |
+
interface State {
|
54 |
+
toasts: ToasterToast[]
|
55 |
+
}
|
56 |
+
|
57 |
+
const toastTimeouts = new Map<string, ReturnType<typeof setTimeout>>()
|
58 |
+
|
59 |
+
const addToRemoveQueue = (toastId: string) => {
|
60 |
+
if (toastTimeouts.has(toastId)) {
|
61 |
+
return
|
62 |
+
}
|
63 |
+
|
64 |
+
const timeout = setTimeout(() => {
|
65 |
+
toastTimeouts.delete(toastId)
|
66 |
+
dispatch({
|
67 |
+
type: "REMOVE_TOAST",
|
68 |
+
toastId: toastId,
|
69 |
+
})
|
70 |
+
}, TOAST_REMOVE_DELAY)
|
71 |
+
|
72 |
+
toastTimeouts.set(toastId, timeout)
|
73 |
+
}
|
74 |
+
|
75 |
+
export const reducer = (state: State, action: Action): State => {
|
76 |
+
switch (action.type) {
|
77 |
+
case "ADD_TOAST":
|
78 |
+
return {
|
79 |
+
...state,
|
80 |
+
toasts: [action.toast, ...state.toasts].slice(0, TOAST_LIMIT),
|
81 |
+
}
|
82 |
+
|
83 |
+
case "UPDATE_TOAST":
|
84 |
+
return {
|
85 |
+
...state,
|
86 |
+
toasts: state.toasts.map((t) =>
|
87 |
+
t.id === action.toast.id ? { ...t, ...action.toast } : t
|
88 |
+
),
|
89 |
+
}
|
90 |
+
|
91 |
+
case "DISMISS_TOAST": {
|
92 |
+
const { toastId } = action
|
93 |
+
|
94 |
+
// ! Side effects ! - This could be extracted into a dismissToast() action,
|
95 |
+
// but I'll keep it here for simplicity
|
96 |
+
if (toastId) {
|
97 |
+
addToRemoveQueue(toastId)
|
98 |
+
} else {
|
99 |
+
state.toasts.forEach((toast) => {
|
100 |
+
addToRemoveQueue(toast.id)
|
101 |
+
})
|
102 |
+
}
|
103 |
+
|
104 |
+
return {
|
105 |
+
...state,
|
106 |
+
toasts: state.toasts.map((t) =>
|
107 |
+
t.id === toastId || toastId === undefined
|
108 |
+
? {
|
109 |
+
...t,
|
110 |
+
open: false,
|
111 |
+
}
|
112 |
+
: t
|
113 |
+
),
|
114 |
+
}
|
115 |
+
}
|
116 |
+
case "REMOVE_TOAST":
|
117 |
+
if (action.toastId === undefined) {
|
118 |
+
return {
|
119 |
+
...state,
|
120 |
+
toasts: [],
|
121 |
+
}
|
122 |
+
}
|
123 |
+
return {
|
124 |
+
...state,
|
125 |
+
toasts: state.toasts.filter((t) => t.id !== action.toastId),
|
126 |
+
}
|
127 |
+
}
|
128 |
+
}
|
129 |
+
|
130 |
+
const listeners: Array<(state: State) => void> = []
|
131 |
+
|
132 |
+
let memoryState: State = { toasts: [] }
|
133 |
+
|
134 |
+
function dispatch(action: Action) {
|
135 |
+
memoryState = reducer(memoryState, action)
|
136 |
+
listeners.forEach((listener) => {
|
137 |
+
listener(memoryState)
|
138 |
+
})
|
139 |
+
}
|
140 |
+
|
141 |
+
type Toast = Omit<ToasterToast, "id">
|
142 |
+
|
143 |
+
function toast({ ...props }: Toast) {
|
144 |
+
const id = genId()
|
145 |
+
|
146 |
+
const update = (props: ToasterToast) =>
|
147 |
+
dispatch({
|
148 |
+
type: "UPDATE_TOAST",
|
149 |
+
toast: { ...props, id },
|
150 |
+
})
|
151 |
+
const dismiss = () => dispatch({ type: "DISMISS_TOAST", toastId: id })
|
152 |
+
|
153 |
+
dispatch({
|
154 |
+
type: "ADD_TOAST",
|
155 |
+
toast: {
|
156 |
+
...props,
|
157 |
+
id,
|
158 |
+
open: true,
|
159 |
+
onOpenChange: (open) => {
|
160 |
+
if (!open) dismiss()
|
161 |
+
},
|
162 |
+
},
|
163 |
+
})
|
164 |
+
|
165 |
+
return {
|
166 |
+
id: id,
|
167 |
+
dismiss,
|
168 |
+
update,
|
169 |
+
}
|
170 |
+
}
|
171 |
+
|
172 |
+
function useToast() {
|
173 |
+
const [state, setState] = React.useState<State>(memoryState)
|
174 |
+
|
175 |
+
React.useEffect(() => {
|
176 |
+
listeners.push(setState)
|
177 |
+
return () => {
|
178 |
+
const index = listeners.indexOf(setState)
|
179 |
+
if (index > -1) {
|
180 |
+
listeners.splice(index, 1)
|
181 |
+
}
|
182 |
+
}
|
183 |
+
}, [state])
|
184 |
+
|
185 |
+
return {
|
186 |
+
...state,
|
187 |
+
toast,
|
188 |
+
dismiss: (toastId?: string) => dispatch({ type: "DISMISS_TOAST", toastId }),
|
189 |
+
}
|
190 |
+
}
|
191 |
+
|
192 |
+
export { useToast, toast }
|
src/lib/blobToBase64Uri.ts
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
export function blobToBase64Uri(blob?: Blob): Promise<string> {
|
2 |
+
return new Promise((resolve, reject) => {
|
3 |
+
if (!blob || typeof window === "undefined" || !window.FileReader) {
|
4 |
+
resolve("")
|
5 |
+
return
|
6 |
+
}
|
7 |
+
|
8 |
+
const reader = new window.FileReader()
|
9 |
+
reader.readAsDataURL(blob)
|
10 |
+
reader.onloadend = () => {
|
11 |
+
resolve(`${reader.result || ""}`)
|
12 |
+
}
|
13 |
+
reader.onerror = () => {
|
14 |
+
// reject("error while converting blob to base64")
|
15 |
+
resolve("")
|
16 |
+
}
|
17 |
+
})
|
18 |
+
}
|
src/types.ts
CHANGED
@@ -7,3 +7,14 @@ export interface ImageAnalysisResponse {
|
|
7 |
result: string
|
8 |
error?: string
|
9 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
result: string
|
8 |
error?: string
|
9 |
}
|
10 |
+
|
11 |
+
|
12 |
+
export interface SoundAnalysisRequest {
|
13 |
+
sound: string // in base64
|
14 |
+
prompt: string
|
15 |
+
}
|
16 |
+
|
17 |
+
export interface SoundAnalysisResponse {
|
18 |
+
result: string
|
19 |
+
error?: string
|
20 |
+
}
|