jbilcke-hf HF staff commited on
Commit
d0d7bbe
1 Parent(s): 48cf37b

embed Broadway into Clapper

Browse files
Files changed (43) hide show
  1. .gitattributes +1 -0
  2. package-lock.json +0 -0
  3. package.json +30 -29
  4. public/datasets/baby-names-us-year-of-birth-full.csv +3 -0
  5. src/app/api/assistant/askAnyAssistant.ts +7 -7
  6. src/app/api/resolve/providers/falai/index.ts +10 -10
  7. src/app/api/resolve/providers/huggingface/generateImage.ts +3 -3
  8. src/app/api/resolve/providers/huggingface/generateVideo.ts +5 -7
  9. src/app/api/resolve/providers/huggingface/generateVoice.ts +3 -3
  10. src/app/api/resolve/providers/huggingface/index.ts +1 -1
  11. src/app/api/resolve/providers/index.ts +7 -0
  12. src/app/api/resolve/providers/replicate/index.ts +5 -5
  13. src/app/api/resolve/providers/stabilityai/generateImage.ts +2 -2
  14. src/app/api/resolve/providers/stabilityai/generateVideo.ts +2 -2
  15. src/app/api/resolve/route.ts +10 -8
  16. src/app/main.tsx +18 -1
  17. src/components/{core/ScriptEditor/index.FOR_LATER → editor/ScriptEditor/index.tsx} +74 -76
  18. src/components/monitor/DynamicPlayer/StoryboardBuffer.tsx +6 -1
  19. src/components/monitor/DynamicPlayer/index.tsx +3 -1
  20. src/components/settings/constants.ts +1 -1
  21. src/components/settings/image.tsx +7 -1
  22. src/components/settings/music.tsx +5 -1
  23. src/components/settings/sound.tsx +5 -2
  24. src/components/settings/video.tsx +5 -3
  25. src/components/settings/voice.tsx +5 -2
  26. src/controllers/audio/startAudioSourceNode.ts +17 -8
  27. src/controllers/audio/useAudio.ts +5 -4
  28. src/controllers/editor/getDefaultEditorState.ts +18 -0
  29. src/controllers/editor/types.ts +41 -0
  30. src/controllers/editor/useEditor.ts +110 -0
  31. src/controllers/io/useIO.ts +20 -14
  32. src/controllers/resolver/useResolver.ts +12 -1
  33. src/controllers/settings/getDefaultSettingsState.ts +16 -11
  34. src/controllers/settings/types.ts +21 -15
  35. src/controllers/settings/useSettings.ts +36 -26
  36. src/lib/core/constants.ts +1 -1
  37. src/lib/hf/adapter/adaptAnyInputsToGradioInputs.ts +30 -4
  38. src/lib/hf/adapter/identifyField.ts +10 -2
  39. src/lib/hf/callGradioApi.ts +10 -6
  40. src/lib/hf/getSpaces.ts +1 -0
  41. src/lib/utils/base64DataUriToBlob.ts +15 -0
  42. tailwind.config.js +4 -0
  43. tsconfig.json +0 -2
.gitattributes CHANGED
@@ -2,3 +2,4 @@
2
  *.xcf filter=lfs diff=lfs merge=lfs -text
3
  *.jpeg filter=lfs diff=lfs merge=lfs -text
4
  *.jpg filter=lfs diff=lfs merge=lfs -text
 
 
2
  *.xcf filter=lfs diff=lfs merge=lfs -text
3
  *.jpeg filter=lfs diff=lfs merge=lfs -text
4
  *.jpg filter=lfs diff=lfs merge=lfs -text
5
+ public/datasets/baby-names-us-year-of-birth-full.csv filter=lfs diff=lfs merge=lfs -text
package-lock.json CHANGED
The diff for this file is too large to render. See raw diff
 
package.json CHANGED
@@ -10,20 +10,21 @@
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
- "@aitube/clap": "0.0.27",
14
- "@aitube/engine": "0.0.24",
15
- "@aitube/timeline": "0.0.29",
16
- "@fal-ai/serverless-client": "^0.10.3",
 
17
  "@gradio/client": "^1.1.1",
18
  "@huggingface/hub": "^0.15.1",
19
  "@huggingface/inference": "^2.7.0",
20
- "@langchain/anthropic": "^0.2.0",
21
- "@langchain/cohere": "^0.0.11",
22
- "@langchain/core": "^0.2.6",
23
- "@langchain/google-vertexai": "^0.0.18",
24
- "@langchain/groq": "^0.0.12",
25
  "@langchain/mistralai": "^0.0.24",
26
- "@langchain/openai": "^0.1.1",
27
  "@monaco-editor/react": "^4.6.0",
28
  "@radix-ui/react-accordion": "^1.1.2",
29
  "@radix-ui/react-avatar": "^1.0.4",
@@ -52,26 +53,19 @@
52
  "@react-three/uikit": "^0.3.4",
53
  "@react-three/uikit-lucide": "^0.3.4",
54
  "@tailwindcss/container-queries": "^0.1.1",
55
- "@types/node": "20.12.7",
56
- "@types/react": "18.3.0",
57
- "@types/react-dom": "18.3.0",
58
- "@types/uuid": "^9.0.8",
59
  "@upstash/ratelimit": "^1.1.3",
60
  "@upstash/redis": "^1.31.1",
61
- "autoprefixer": "10.4.17",
62
  "class-variance-authority": "^0.7.0",
63
- "clsx": "^2.1.0",
64
  "cmdk": "^0.2.1",
65
- "eslint": "8.57.0",
66
- "eslint-config-next": "14.1.0",
67
  "fluent-ffmpeg": "^2.1.3",
68
  "fs-extra": "^11.2.0",
69
- "lucide-react": "^0.334.0",
70
  "mlt-xml": "^2.0.2",
71
- "monaco-editor": "^0.49.0",
72
- "next": "^14.2.3",
73
- "next-themes": "^0.2.1",
74
- "postcss": "8.4.38",
75
  "qs": "^6.12.1",
76
  "query-string": "^9.0.0",
77
  "react": "^18.3.1",
@@ -87,22 +81,29 @@
87
  "react-speakup": "^1.0.0",
88
  "replicate": "^0.30.2",
89
  "sharp": "^0.33.4",
90
- "sonner": "^1.4.41",
91
  "tailwind-merge": "^2.3.0",
92
- "tailwindcss": "^3.4.3",
93
  "tailwindcss-animate": "^1.0.7",
94
  "three": "^0.164.1",
95
  "ts-node": "^10.9.2",
96
- "typescript": "5.4.5",
97
  "use-file-picker": "^2.1.2",
98
  "usehooks-ts": "^2.14.0",
99
  "uuid": "^9.0.1",
100
  "web-audio-beat-detector": "^8.2.10",
101
- "yaml": "^2.4.2",
102
  "zustand": "^4.5.2",
103
- "zx": "^8.1.2"
104
  },
105
  "devDependencies": {
106
- "@types/fluent-ffmpeg": "^2.1.24"
 
 
 
 
 
 
 
 
 
107
  }
108
  }
 
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
+ "@aitube/broadway": "0.0.19",
14
+ "@aitube/clap": "0.0.29",
15
+ "@aitube/engine": "0.0.25",
16
+ "@aitube/timeline": "0.0.32",
17
+ "@fal-ai/serverless-client": "^0.11.0",
18
  "@gradio/client": "^1.1.1",
19
  "@huggingface/hub": "^0.15.1",
20
  "@huggingface/inference": "^2.7.0",
21
+ "@langchain/anthropic": "^0.2.2",
22
+ "@langchain/cohere": "^0.1.0",
23
+ "@langchain/core": "^0.2.9",
24
+ "@langchain/google-vertexai": "^0.0.19",
25
+ "@langchain/groq": "^0.0.13",
26
  "@langchain/mistralai": "^0.0.24",
27
+ "@langchain/openai": "^0.2.0",
28
  "@monaco-editor/react": "^4.6.0",
29
  "@radix-ui/react-accordion": "^1.1.2",
30
  "@radix-ui/react-avatar": "^1.0.4",
 
53
  "@react-three/uikit": "^0.3.4",
54
  "@react-three/uikit-lucide": "^0.3.4",
55
  "@tailwindcss/container-queries": "^0.1.1",
 
 
 
 
56
  "@upstash/ratelimit": "^1.1.3",
57
  "@upstash/redis": "^1.31.1",
58
+ "autoprefixer": "10.4.19",
59
  "class-variance-authority": "^0.7.0",
60
+ "clsx": "^2.1.1",
61
  "cmdk": "^0.2.1",
 
 
62
  "fluent-ffmpeg": "^2.1.3",
63
  "fs-extra": "^11.2.0",
64
+ "lucide-react": "^0.396.0",
65
  "mlt-xml": "^2.0.2",
66
+ "monaco-editor": "^0.50.0",
67
+ "next": "^14.2.4",
68
+ "next-themes": "^0.3.0",
 
69
  "qs": "^6.12.1",
70
  "query-string": "^9.0.0",
71
  "react": "^18.3.1",
 
81
  "react-speakup": "^1.0.0",
82
  "replicate": "^0.30.2",
83
  "sharp": "^0.33.4",
84
+ "sonner": "^1.5.0",
85
  "tailwind-merge": "^2.3.0",
 
86
  "tailwindcss-animate": "^1.0.7",
87
  "three": "^0.164.1",
88
  "ts-node": "^10.9.2",
 
89
  "use-file-picker": "^2.1.2",
90
  "usehooks-ts": "^2.14.0",
91
  "uuid": "^9.0.1",
92
  "web-audio-beat-detector": "^8.2.10",
93
+ "yaml": "^2.4.5",
94
  "zustand": "^4.5.2",
95
+ "zx": "^8.1.3"
96
  },
97
  "devDependencies": {
98
+ "@types/fluent-ffmpeg": "^2.1.24",
99
+ "@types/node": "^20",
100
+ "@types/react": "^18",
101
+ "@types/react-dom": "^18",
102
+ "@types/uuid": "^9.0.8",
103
+ "eslint": "^8",
104
+ "eslint-config-next": "14.2.4",
105
+ "postcss": "^8",
106
+ "tailwindcss": "^3.4.3",
107
+ "typescript": "^5"
108
  }
109
  }
public/datasets/baby-names-us-year-of-birth-full.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897ebf724f4c0fd8d408d13e5babdddeee838a409203701dea89287ec52a612a
3
+ size 31419088
src/app/api/assistant/askAnyAssistant.ts CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import { ClapSegmentCategory } from "@aitube/clap"
4
  import { RunnableLike } from "@langchain/core/runnables"
5
- import { ChatPromptValueInterface } from "@langchain/core/dist/prompt_values"
6
  import { AIMessage, AIMessageChunk, HumanMessage } from "@langchain/core/messages"
7
  import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts"
8
  import { StructuredOutputParser } from "@langchain/core/output_parsers"
@@ -60,37 +60,37 @@ export async function askAnyAssistant({
60
  provider === ComputeProvider.GROQ
61
  ? new ChatGroq({
62
  apiKey: settings.groqApiKey,
63
- modelName: settings.groqModelForAssistant,
64
  // temperature: 0.7,
65
  })
66
  : provider === ComputeProvider.OPENAI
67
  ? new ChatOpenAI({
68
  openAIApiKey: settings.openaiApiKey,
69
- modelName: settings.openaiModelForAssistant,
70
  // temperature: 0.7,
71
  })
72
  : provider === ComputeProvider.ANTHROPIC
73
  ? new ChatAnthropic({
74
  anthropicApiKey: settings.anthropicApiKey,
75
- modelName: settings.anthropicModelForAssistant,
76
  // temperature: 0.7,
77
  })
78
  : provider === ComputeProvider.COHERE
79
  ? new ChatCohere({
80
  apiKey: settings.cohereApiKey,
81
- model: settings.cohereModelForAssistant,
82
  // temperature: 0.7,
83
  })
84
  : provider === ComputeProvider.MISTRALAI
85
  ? new ChatMistralAI({
86
  apiKey: settings.mistralAiApiKey,
87
- modelName: settings.mistralAiModelForAssistant,
88
  // temperature: 0.7,
89
  })
90
  : provider === ComputeProvider.GOOGLE
91
  ? new ChatVertexAI({
92
  apiKey: settings.googleApiKey,
93
- modelName: settings.googleModelForAssistant,
94
  // temperature: 0.7,
95
  })
96
  : undefined
 
2
 
3
  import { ClapSegmentCategory } from "@aitube/clap"
4
  import { RunnableLike } from "@langchain/core/runnables"
5
+ import { ChatPromptValueInterface } from "@langchain/core/prompt_values"
6
  import { AIMessage, AIMessageChunk, HumanMessage } from "@langchain/core/messages"
7
  import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts"
8
  import { StructuredOutputParser } from "@langchain/core/output_parsers"
 
60
  provider === ComputeProvider.GROQ
61
  ? new ChatGroq({
62
  apiKey: settings.groqApiKey,
63
+ modelName: settings.assistantModel,
64
  // temperature: 0.7,
65
  })
66
  : provider === ComputeProvider.OPENAI
67
  ? new ChatOpenAI({
68
  openAIApiKey: settings.openaiApiKey,
69
+ modelName: settings.assistantModel,
70
  // temperature: 0.7,
71
  })
72
  : provider === ComputeProvider.ANTHROPIC
73
  ? new ChatAnthropic({
74
  anthropicApiKey: settings.anthropicApiKey,
75
+ modelName: settings.assistantModel,
76
  // temperature: 0.7,
77
  })
78
  : provider === ComputeProvider.COHERE
79
  ? new ChatCohere({
80
  apiKey: settings.cohereApiKey,
81
+ model: settings.assistantModel,
82
  // temperature: 0.7,
83
  })
84
  : provider === ComputeProvider.MISTRALAI
85
  ? new ChatMistralAI({
86
  apiKey: settings.mistralAiApiKey,
87
+ modelName: settings.assistantModel,
88
  // temperature: 0.7,
89
  })
90
  : provider === ComputeProvider.GOOGLE
91
  ? new ChatVertexAI({
92
  apiKey: settings.googleApiKey,
93
+ modelName: settings.assistantModel,
94
  // temperature: 0.7,
95
  })
96
  : undefined
src/app/api/resolve/providers/falai/index.ts CHANGED
@@ -36,18 +36,18 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
36
 
37
  let result: FalAiImageResponse | undefined = undefined
38
 
39
- if (request.settings.falAiModelForImage === "fal-ai/pulid") {
40
  if (!request.prompts.image.identity) {
41
  // throw new Error(`you selected model ${request.settings.falAiModelForImage}, but no character was found, so skipping`)
42
  // console.log(`warning: user selected model ${request.settings.falAiModelForImage}, but no character was found. Falling back to fal-ai/fast-sdxl`)
43
 
44
  // dirty fix to fallback to a non-face model
45
- request.settings.falAiModelForImage = "fal-ai/fast-sdxl"
46
  }
47
  }
48
 
49
- if (request.settings.falAiModelForImage === "fal-ai/pulid") {
50
- result = await fal.run(request.settings.falAiModelForImage, {
51
  input: {
52
  reference_images: [{
53
  image_url: request.prompts.image.identity
@@ -60,13 +60,13 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
60
  }) as FalAiImageResponse
61
 
62
  } else {
63
- result = await fal.run(request.settings.falAiModelForImage, {
64
  input: {
65
  prompt: request.prompts.image.positive,
66
  image_size: imageSize,
67
  sync_mode: true,
68
  num_inference_steps:
69
- request.settings.falAiModelForImage === "fal-ai/stable-diffusion-v3-medium"
70
  ? 40
71
  : 25,
72
  num_images: 1,
@@ -86,7 +86,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
86
  } else if (request.segment.category === ClapSegmentCategory.VIDEO) {
87
 
88
  // console.log(`request.settings.falAiModelForVideo = `, request.settings.falAiModelForVideo)
89
- if (request.settings.falAiModelForVideo !== "fal-ai/stable-video") {
90
  throw new Error(`only "fal-ai/stable-video" is supported by Clapper for the moment`)
91
  }
92
 
@@ -94,7 +94,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
94
  if (!storyboard) {
95
  throw new Error(`cannot generate a video without a storyboard (the concept of Clapper is to use storyboards)`)
96
  }
97
- const result = await fal.run(request.settings.falAiModelForVideo, {
98
  input: {
99
  image_url: storyboard.assetUrl,
100
 
@@ -124,7 +124,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
124
  ||
125
  request.segment.category === ClapSegmentCategory.MUSIC
126
  ) {
127
- const result = await fal.run(request.settings.falAiModelForSound, {
128
  input: {
129
  // note how we use the *segment* prompt for music or sound
130
  prompt: request.segment.prompt,
@@ -138,7 +138,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
138
  } else if (
139
  request.segment.category === ClapSegmentCategory.DIALOGUE
140
  ) {
141
- const result = await fal.run(request.settings.falAiModelForVoice, {
142
  input: {
143
  text: request.segment.prompt,
144
 
 
36
 
37
  let result: FalAiImageResponse | undefined = undefined
38
 
39
+ if (request.settings.imageGenerationModel === "fal-ai/pulid") {
40
  if (!request.prompts.image.identity) {
41
  // throw new Error(`you selected model ${request.settings.falAiModelForImage}, but no character was found, so skipping`)
42
  // console.log(`warning: user selected model ${request.settings.falAiModelForImage}, but no character was found. Falling back to fal-ai/fast-sdxl`)
43
 
44
  // dirty fix to fallback to a non-face model
45
+ request.settings.imageGenerationModel = "fal-ai/fast-sdxl"
46
  }
47
  }
48
 
49
+ if (request.settings.imageGenerationModel === "fal-ai/pulid") {
50
+ result = await fal.run(request.settings.imageGenerationModel, {
51
  input: {
52
  reference_images: [{
53
  image_url: request.prompts.image.identity
 
60
  }) as FalAiImageResponse
61
 
62
  } else {
63
+ result = await fal.run(request.settings.imageGenerationModel, {
64
  input: {
65
  prompt: request.prompts.image.positive,
66
  image_size: imageSize,
67
  sync_mode: true,
68
  num_inference_steps:
69
+ request.settings.imageGenerationModel === "fal-ai/stable-diffusion-v3-medium"
70
  ? 40
71
  : 25,
72
  num_images: 1,
 
86
  } else if (request.segment.category === ClapSegmentCategory.VIDEO) {
87
 
88
  // console.log(`request.settings.falAiModelForVideo = `, request.settings.falAiModelForVideo)
89
+ if (request.settings.videoGenerationModel !== "fal-ai/stable-video") {
90
  throw new Error(`only "fal-ai/stable-video" is supported by Clapper for the moment`)
91
  }
92
 
 
94
  if (!storyboard) {
95
  throw new Error(`cannot generate a video without a storyboard (the concept of Clapper is to use storyboards)`)
96
  }
97
+ const result = await fal.run(request.settings.videoGenerationModel, {
98
  input: {
99
  image_url: storyboard.assetUrl,
100
 
 
124
  ||
125
  request.segment.category === ClapSegmentCategory.MUSIC
126
  ) {
127
+ const result = await fal.run(request.settings.soundGenerationModel, {
128
  input: {
129
  // note how we use the *segment* prompt for music or sound
130
  prompt: request.segment.prompt,
 
138
  } else if (
139
  request.segment.category === ClapSegmentCategory.DIALOGUE
140
  ) {
141
+ const result = await fal.run(request.settings.voiceGenerationModel, {
142
  input: {
143
  text: request.segment.prompt,
144
 
src/app/api/resolve/providers/huggingface/generateImage.ts CHANGED
@@ -5,8 +5,8 @@ import { ResolveRequest } from "@/types"
5
 
6
  export async function generateImage(request: ResolveRequest): Promise<string> {
7
 
8
- if (!request.settings.huggingFaceModelForImage) {
9
- throw new Error(`HuggingFace.generateImage: cannot generate without a valid huggingFaceModelForImage`)
10
  }
11
 
12
  if (!request.prompts.image.positive) {
@@ -20,7 +20,7 @@ export async function generateImage(request: ResolveRequest): Promise<string> {
20
  const hf: HfInferenceEndpoint = new HfInference(request.settings.huggingFaceApiKey)
21
 
22
  const blob: Blob = await hf.textToImage({
23
- model: request.settings.huggingFaceModelForImage,
24
  inputs: request.prompts.image.positive,
25
  parameters: {
26
  height: request.meta.height,
 
5
 
6
  export async function generateImage(request: ResolveRequest): Promise<string> {
7
 
8
+ if (!request.settings.imageGenerationModel) {
9
+ throw new Error(`HuggingFace.generateImage: cannot generate without a valid imageGenerationModel`)
10
  }
11
 
12
  if (!request.prompts.image.positive) {
 
20
  const hf: HfInferenceEndpoint = new HfInference(request.settings.huggingFaceApiKey)
21
 
22
  const blob: Blob = await hf.textToImage({
23
+ model: request.settings.imageGenerationModel,
24
  inputs: request.prompts.image.positive,
25
  parameters: {
26
  height: request.meta.height,
src/app/api/resolve/providers/huggingface/generateVideo.ts CHANGED
@@ -3,8 +3,8 @@ import { callGradioApi } from "@/lib/hf/callGradioApi"
3
 
4
  export async function generateVideo(request: ResolveRequest): Promise<string> {
5
 
6
- if (!request.settings.huggingFaceModelForVideo) {
7
- throw new Error(`HuggingFace.generateVideo: cannot generate without a valid huggingFaceModelForVideo`)
8
  }
9
 
10
  if (!request.prompts.video.image) {
@@ -16,13 +16,11 @@ export async function generateVideo(request: ResolveRequest): Promise<string> {
16
  }
17
 
18
  // TODO pass a type to the template function
19
- const output = await callGradioApi({
20
- url: request.settings.huggingFaceModelForVideo,
21
  inputs: request.prompts.video,
22
  apiKey: request.settings.huggingFaceApiKey
23
  })
24
 
25
- console.log(`output from the Gradio API:`, output)
26
-
27
- throw new Error(`please finish me`)
28
  }
 
3
 
4
  export async function generateVideo(request: ResolveRequest): Promise<string> {
5
 
6
+ if (!request.settings.videoGenerationModel) {
7
+ throw new Error(`HuggingFace.generateVideo: cannot generate without a valid videoGenerationModel`)
8
  }
9
 
10
  if (!request.prompts.video.image) {
 
16
  }
17
 
18
  // TODO pass a type to the template function
19
+ const assetUrl = await callGradioApi<string>({
20
+ url: request.settings.videoGenerationModel,
21
  inputs: request.prompts.video,
22
  apiKey: request.settings.huggingFaceApiKey
23
  })
24
 
25
+ return assetUrl
 
 
26
  }
src/app/api/resolve/providers/huggingface/generateVoice.ts CHANGED
@@ -4,8 +4,8 @@ import { ResolveRequest } from "@/types"
4
 
5
  export async function generateVoice(request: ResolveRequest): Promise<string> {
6
 
7
- if (!request.settings.huggingFaceModelForVoice) {
8
- throw new Error(`HuggingFace.generateVoice: cannot generate without a valid huggingFaceModelForVoice`)
9
  }
10
 
11
  if (!request.prompts.voice.positive) {
@@ -19,7 +19,7 @@ export async function generateVoice(request: ResolveRequest): Promise<string> {
19
  const hf: HfInferenceEndpoint = new HfInference(request.settings.huggingFaceApiKey)
20
 
21
  const blob: Blob = await hf.textToSpeech({
22
- model: request.settings.huggingFaceModelForVoice,
23
  inputs: request.prompts.voice.positive,
24
  })
25
 
 
4
 
5
  export async function generateVoice(request: ResolveRequest): Promise<string> {
6
 
7
+ if (!request.settings.voiceGenerationModel) {
8
+ throw new Error(`HuggingFace.generateVoice: cannot generate without a valid voiceGenerationModel`)
9
  }
10
 
11
  if (!request.prompts.voice.positive) {
 
19
  const hf: HfInferenceEndpoint = new HfInference(request.settings.huggingFaceApiKey)
20
 
21
  const blob: Blob = await hf.textToSpeech({
22
+ model: request.settings.voiceGenerationModel,
23
  inputs: request.prompts.voice.positive,
24
  })
25
 
src/app/api/resolve/providers/huggingface/index.ts CHANGED
@@ -24,7 +24,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
24
  } if (request.segment.category === ClapSegmentCategory.VIDEO) {
25
  segment.assetUrl = await generateVideo(request)
26
  } else {
27
- throw new Error(`Clapper doesn't support ${request.segment.category} generation for provider "Hugging Face" with model (or space) "${request.settings.huggingFaceModelForVideo}". Please open a pull request with (working code) to solve this!`)
28
  }
29
  return segment
30
  }
 
24
  } if (request.segment.category === ClapSegmentCategory.VIDEO) {
25
  segment.assetUrl = await generateVideo(request)
26
  } else {
27
+ throw new Error(`Clapper doesn't support ${request.segment.category} generation for provider "Hugging Face" with model (or space) "${request.settings.videoGenerationModel}". Please open a pull request with (working code) to solve this!`)
28
  }
29
  return segment
30
  }
src/app/api/resolve/providers/index.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ export { resolveSegment as resolveSegmentUsingHuggingFace } from "./huggingface"
2
+ export { resolveSegment as resolveSegmentUsingComfyReplicate } from "./comfy-replicate"
3
+ export { resolveSegment as resolveSegmentUsingReplicate } from "./replicate"
4
+ export { resolveSegment as resolveSegmentUsingComfyComfyIcu } from "./comfy-comfyicu"
5
+ export { resolveSegment as resolveSegmentUsingFalAi } from "./falai"
6
+ export { resolveSegment as resolveSegmentUsingModelsLab } from "./modelslab"
7
+ export { resolveSegment as resolveSegmentUsingStabilityAi } from "./stabilityai"
src/app/api/resolve/providers/replicate/index.ts CHANGED
@@ -20,12 +20,12 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
20
  // like we are doing for Hugging Face (match the fields etc)
21
  if (request.segment.category === ClapSegmentCategory.STORYBOARD) {
22
  let params: object = {}
23
- if (request.settings.replicateModelForImage === "fofr/pulid-lightning") {
24
  params = {
25
  prompt: request.prompts.image.positive,
26
  face_image: request.prompts.image.identity,
27
  }
28
- } else if (request.settings.replicateModelForImage === "zsxkib/pulid") {
29
  params = {
30
  prompt: request.prompts.image.positive,
31
  main_face_image: request.prompts.image.identity,
@@ -36,13 +36,13 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
36
  }
37
  }
38
  const response = await replicate.run(
39
- request.settings.replicateModelForImage as any,
40
  { input: params }
41
  ) as any
42
  segment.assetUrl = `${response.output || ""}`
43
  } else if (request.segment.category === ClapSegmentCategory.DIALOGUE) {
44
  const response = await replicate.run(
45
- request.settings.replicateModelForVoice as any, {
46
  input: {
47
  text: request.prompts.voice.positive,
48
  audio: request.prompts.voice.identity,
@@ -51,7 +51,7 @@ export async function resolveSegment(request: ResolveRequest): Promise<ClapSegme
51
  segment.assetUrl = `${response.output || ""}`
52
  } else if (request.segment.category === ClapSegmentCategory.VIDEO) {
53
  const response = await replicate.run(
54
- request.settings.replicateModelForVideo as any, {
55
  input: {
56
  image: request.prompts.video.image,
57
  }
 
20
  // like we are doing for Hugging Face (match the fields etc)
21
  if (request.segment.category === ClapSegmentCategory.STORYBOARD) {
22
  let params: object = {}
23
+ if (request.settings.imageGenerationModel === "fofr/pulid-lightning") {
24
  params = {
25
  prompt: request.prompts.image.positive,
26
  face_image: request.prompts.image.identity,
27
  }
28
+ } else if (request.settings.imageGenerationModel === "zsxkib/pulid") {
29
  params = {
30
  prompt: request.prompts.image.positive,
31
  main_face_image: request.prompts.image.identity,
 
36
  }
37
  }
38
  const response = await replicate.run(
39
+ request.settings.imageGenerationModel as any,
40
  { input: params }
41
  ) as any
42
  segment.assetUrl = `${response.output || ""}`
43
  } else if (request.segment.category === ClapSegmentCategory.DIALOGUE) {
44
  const response = await replicate.run(
45
+ request.settings.voiceGenerationModel as any, {
46
  input: {
47
  text: request.prompts.voice.positive,
48
  audio: request.prompts.voice.identity,
 
51
  segment.assetUrl = `${response.output || ""}`
52
  } else if (request.segment.category === ClapSegmentCategory.VIDEO) {
53
  const response = await replicate.run(
54
+ request.settings.videoGenerationModel as any, {
55
  input: {
56
  image: request.prompts.video.image,
57
  }
src/app/api/resolve/providers/stabilityai/generateImage.ts CHANGED
@@ -8,7 +8,7 @@ export async function generateImage(request: ResolveRequest): Promise<string> {
8
  throw new Error(`StabilityAI.generateImage: cannot generate without a valid stabilityAiApiKey`)
9
  }
10
 
11
- if (!request.settings.stabilityAiModelForImage) {
12
  throw new Error(`StabilityAI.generateImage: cannot generate without a valid stabilityAiModelForImage`)
13
  }
14
 
@@ -39,7 +39,7 @@ export async function generateImage(request: ResolveRequest): Promise<string> {
39
  body.set("negative_prompt", `${request.prompts.image.negative || ""}`)
40
  body.set("aspect_ratio", `${aspectRatio || ""}`)
41
 
42
- const response = await fetch(`https://api.stability.ai/v2beta/${request.settings.stabilityAiModelForImage}`, {
43
  method: "POST",
44
  headers: {
45
  Authorization: `Bearer ${request.settings.stabilityAiApiKey}`,
 
8
  throw new Error(`StabilityAI.generateImage: cannot generate without a valid stabilityAiApiKey`)
9
  }
10
 
11
+ if (!request.settings.imageGenerationModel) {
12
  throw new Error(`StabilityAI.generateImage: cannot generate without a valid stabilityAiModelForImage`)
13
  }
14
 
 
39
  body.set("negative_prompt", `${request.prompts.image.negative || ""}`)
40
  body.set("aspect_ratio", `${aspectRatio || ""}`)
41
 
42
+ const response = await fetch(`https://api.stability.ai/v2beta/${request.settings.imageGenerationModel}`, {
43
  method: "POST",
44
  headers: {
45
  Authorization: `Bearer ${request.settings.stabilityAiApiKey}`,
src/app/api/resolve/providers/stabilityai/generateVideo.ts CHANGED
@@ -8,8 +8,8 @@ export async function generateVideo(request: ResolveRequest): Promise<string> {
8
  throw new Error(`StabilityAI.generateVideo: cannot generate without a valid stabilityAiApiKey`)
9
  }
10
 
11
- if (!request.settings.stabilityAiModelForVideo) {
12
- throw new Error(`StabilityAI.generateVideo: cannot generate without a valid stabilityAiModelForVideo`)
13
  }
14
 
15
 
 
8
  throw new Error(`StabilityAI.generateVideo: cannot generate without a valid stabilityAiApiKey`)
9
  }
10
 
11
+ if (!request.settings.videoGenerationModel) {
12
+ throw new Error(`StabilityAI.generateVideo: cannot generate without a valid videoGenerationModel`)
13
  }
14
 
15
 
src/app/api/resolve/route.ts CHANGED
@@ -1,13 +1,15 @@
1
  import { NextResponse, NextRequest } from "next/server"
2
  import { ClapOutputType, ClapSegment, ClapSegmentCategory, ClapSegmentStatus, getClapAssetSourceType } from "@aitube/clap"
3
 
4
- import { resolveSegment as resolveSegmentUsingHuggingFace } from "./providers/huggingface"
5
- import { resolveSegment as resolveSegmentUsingComfyReplicate } from "./providers/comfy-replicate"
6
- import { resolveSegment as resolveSegmentUsingReplicate } from "./providers/replicate"
7
- import { resolveSegment as resolveSegmentUsingComfyComfyIcu } from "./providers/comfy-comfyicu"
8
- import { resolveSegment as resolveSegmentUsingFalAi } from "./providers/falai"
9
- import { resolveSegment as resolveSegmentUsingModelsLab } from "./providers/modelslab"
10
- import { resolveSegment as resolveSegmentUsingStabilityAi } from "./providers/stabilityai"
 
 
11
 
12
  import { ComputeProvider, ResolveRequest } from "@/types"
13
  import { decodeOutput } from "@/lib/utils/decodeOutput"
@@ -21,7 +23,7 @@ export async function POST(req: NextRequest) {
21
  // console.log(`TODO Julian: secure the endpoint`)
22
  // await throwIfInvalidToken(req.headers.get("Authorization"))
23
  const request = (await req.json()) as ResolveRequest
24
-
25
  const provider =
26
  request.segment.category === ClapSegmentCategory.STORYBOARD
27
  ? request.settings.imageProvider
 
1
  import { NextResponse, NextRequest } from "next/server"
2
  import { ClapOutputType, ClapSegment, ClapSegmentCategory, ClapSegmentStatus, getClapAssetSourceType } from "@aitube/clap"
3
 
4
+ import {
5
+ resolveSegmentUsingHuggingFace,
6
+ resolveSegmentUsingComfyReplicate,
7
+ resolveSegmentUsingReplicate,
8
+ resolveSegmentUsingComfyComfyIcu,
9
+ resolveSegmentUsingFalAi,
10
+ resolveSegmentUsingModelsLab,
11
+ resolveSegmentUsingStabilityAi
12
+ } from "./providers"
13
 
14
  import { ComputeProvider, ResolveRequest } from "@/types"
15
  import { decodeOutput } from "@/lib/utils/decodeOutput"
 
23
  // console.log(`TODO Julian: secure the endpoint`)
24
  // await throwIfInvalidToken(req.headers.get("Authorization"))
25
  const request = (await req.json()) as ResolveRequest
26
+
27
  const provider =
28
  request.segment.category === ClapSegmentCategory.STORYBOARD
29
  ? request.settings.imageProvider
src/app/main.tsx CHANGED
@@ -22,6 +22,7 @@ import { TopBar } from "@/components/toolbars/top-bar"
22
  import { Timeline } from "@/components/core/timeline"
23
  import { useIO } from "@/controllers/io/useIO"
24
  import { ChatView } from "@/components/assistant/ChatView"
 
25
  import { useSearchParams } from "next/navigation"
26
 
27
  type DroppableThing = { files: File[] }
@@ -86,7 +87,23 @@ function MainContent() {
86
  <ReflexElement
87
  minSize={showTimeline ? 100 : 1}
88
  >
89
- <Monitor />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  </ReflexElement>
91
  <ReflexSplitter />
92
  <ReflexElement
 
22
  import { Timeline } from "@/components/core/timeline"
23
  import { useIO } from "@/controllers/io/useIO"
24
  import { ChatView } from "@/components/assistant/ChatView"
25
+ import { ScriptEditor } from "@/components/editor/ScriptEditor"
26
  import { useSearchParams } from "next/navigation"
27
 
28
  type DroppableThing = { files: File[] }
 
87
  <ReflexElement
88
  minSize={showTimeline ? 100 : 1}
89
  >
90
+ <ReflexContainer orientation="vertical">
91
+
92
+
93
+ <ReflexElement
94
+ size={showTimeline ? 400 : 1}
95
+ minSize={showTimeline ? 100 : 1}
96
+ maxSize={showTimeline ? 1600 : 1}
97
+ >
98
+ <ScriptEditor />
99
+ </ReflexElement>
100
+ <ReflexSplitter />
101
+ <ReflexElement
102
+ minSize={showTimeline ? 100 : 1}
103
+ >
104
+ <Monitor />
105
+ </ReflexElement>
106
+ </ReflexContainer>
107
  </ReflexElement>
108
  <ReflexSplitter />
109
  <ReflexElement
src/components/{core/ScriptEditor/index.FOR_LATER → editor/ScriptEditor/index.tsx} RENAMED
@@ -1,6 +1,10 @@
1
- import { useEffect, useState } from "react"
2
  import MonacoEditor from "monaco-editor"
3
  import Editor from "@monaco-editor/react"
 
 
 
 
4
 
5
  const beforeMount = ({ editor }: { editor: typeof MonacoEditor.editor }) => {
6
  // Define a custom theme with the provided color palette
@@ -11,15 +15,15 @@ const beforeMount = ({ editor }: { editor: typeof MonacoEditor.editor }) => {
11
  // You can define token-specific styles here if needed
12
  ],
13
  colors: {
14
- 'editor.background': '#111827', // Editor background color (given)
15
- 'editorCursor.foreground': '#e5e7eb', // Cursor color
16
- 'editor.lineHighlightBackground': '#374151', // Highlighted line color
17
- 'editorLineNumber.foreground': '#6b7280', // Line Numbers color
18
- 'editor.selectionBackground': '#2c333c', // Selection color
19
- 'editor.foreground': '#d1d5db', // Main text color
20
- 'editorIndentGuide.background': '#4b5563', // Indent guides color
21
- 'editorIndentGuide.activeBackground': '#6b7280', // Active indent guides color
22
- 'editorWhitespace.foreground': '#3b4049', // Whitespace symbols color
23
  // Add more color overrides if needed here
24
  },
25
  })
@@ -29,14 +33,57 @@ const beforeMount = ({ editor }: { editor: typeof MonacoEditor.editor }) => {
29
  }
30
 
31
  export function ScriptEditor() {
32
- const [editor, setEditor] = useState<MonacoEditor.editor.IStandaloneCodeEditor>()
33
- const script = useApp(state => state.script)
34
- const rewriteCurrentScript = useApp(state => state.rewriteCurrentScript)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  const isPlaying = useApp(state => state.isPlaying)
36
  const setCursorAt = useApp((state) => state.setCursorAt)
37
  const [scriptContent, setScriptContent] = useState("")
 
 
 
38
 
39
- const activeSegments = useApp(state => state.activeSegments)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  const stepsToPreviews = useApp(state => state.stepsToPreviews)
41
 
42
  const screenplayScroll = useInterface(state => state.screenplayScroll)
@@ -48,6 +95,8 @@ export function ScriptEditor() {
48
 
49
  // console.log("linesToPreview:", linesToPreviews)
50
 
 
 
51
  useEffect(() => {
52
  if (editor && leftmostVisibleScene) {
53
  // console.log("ScriptEditor: timelineScrollLeftInStep changed to scene " + leftmostVisibleScene.line)
@@ -58,29 +107,10 @@ export function ScriptEditor() {
58
  editor.revealLineInCenter(lineNumber)
59
  }
60
  }, [editor, leftmostVisibleScene])
 
61
 
62
 
63
- const activeScene = activeSegments
64
- .find(s => s.category === "video")?.scene
65
-
66
- const activeSceneLineNumber = (activeScene?.startAtLine || 0)
67
-
68
- useEffect(() => {
69
- const fn = async () => {
70
- // support both text and Blob
71
- let content = ""
72
- if (typeof script.content !== "string") {
73
- content = await script.content.text()
74
- } else {
75
- content = script.content
76
- }
77
- editor?.setValue(content)
78
- setScriptContent(content)
79
- }
80
- fn()
81
-
82
- }, [editor, script.content])
83
-
84
  useEffect(() => {
85
  if (editor && activeSceneLineNumber) {
86
  // console.log("useEffect:", activeSceneLineNumber)
@@ -107,7 +137,7 @@ export function ScriptEditor() {
107
  }
108
  }
109
  }, [editor, activeSceneLineNumber])
110
-
111
  const onMount = (editor: MonacoEditor.editor.IStandaloneCodeEditor) => {
112
  const model = editor.getModel()
113
  if (!model) { return }
@@ -115,39 +145,14 @@ export function ScriptEditor() {
115
  setEditor(editor)
116
 
117
  editor.onMouseDown((e) => {
118
- const currentPosition = editor.getPosition()
119
-
120
- const line = currentPosition?.lineNumber
121
- if (typeof line !== "number") { return }
122
-
123
- // so... due to how monaco callbacks work, we cannot use the hook context
124
- // to get the linesToPreview.. but that's okay!
125
- const linesToPreviews = useApp.getState().linesToPreviews
126
-
127
- const startTimeInSteps = linesToPreviews[line]?.startTimeInSteps
128
- if (typeof startTimeInSteps !== "number") { return }
129
-
130
- setCursorAt(startTimeInSteps * DEFAULT_DURATION_IN_MS_PER_STEP)
131
  })
132
 
133
  editor.onDidScrollChange(({ scrollTop, scrollLeft, scrollWidth, scrollHeight }: MonacoEditor.IScrollEvent) => {
134
- /*if (scrollHeight !== screenplayScroll.scrollHeight &&
135
- scrollLeft !== screenplayScroll.scrollLeft &&
136
- scrollTop !== screenplayScroll.scrollTop &&
137
- scrollWidth !== screenplayScroll.scrollWidth) {
138
- */
139
- // console.log(`ScriptEditor:onDidScrollChange(${JSON.stringify({ scrollTop, scrollLeft, scrollWidth, scrollHeight }, null, 2)})`)
140
- setScreenplayScroll({
141
- shouldRerender: false,
142
- scrollTop,
143
- scrollLeft,
144
- scrollWidth,
145
- scrollHeight
146
- })
147
- //}
148
-
149
- // TODO we need to grab the leftmost segment
150
- // now the problem is that this might be a bit costly to do
151
  })
152
 
153
  // as an optimization we can use this later, for surgical edits,
@@ -162,14 +167,7 @@ export function ScriptEditor() {
162
  }
163
 
164
  const onChange = (plainText?: string) => {
165
- if (!plainText) { return }
166
-
167
- if (plainText === scriptContent) { return }
168
-
169
- console.log("generic onChange:")
170
- // this function is currently *very* expensive
171
- // the only optimization right now is that we debounce it
172
- rewriteCurrentScript(plainText)
173
  }
174
 
175
  return (
@@ -177,13 +175,13 @@ export function ScriptEditor() {
177
  <Editor
178
  height="100%"
179
  defaultLanguage="plaintext"
180
- defaultValue={scriptContent}
181
  beforeMount={beforeMount}
182
  onMount={onMount}
183
  onChange={onChange}
184
  theme="customTheme"
185
  options={{
186
- fontSize: 14
187
  }}
188
  />
189
  </div>
 
1
+ import React, { useEffect, useState } from "react"
2
  import MonacoEditor from "monaco-editor"
3
  import Editor from "@monaco-editor/react"
4
+ import { DEFAULT_DURATION_IN_MS_PER_STEP, TimelineStore, useTimeline } from "@aitube/timeline"
5
+
6
+ import { useEditor } from "@/controllers/editor/useEditor"
7
+ import { useRenderer } from "@/controllers/renderer"
8
 
9
  const beforeMount = ({ editor }: { editor: typeof MonacoEditor.editor }) => {
10
  // Define a custom theme with the provided color palette
 
15
  // You can define token-specific styles here if needed
16
  ],
17
  colors: {
18
+ 'editor.background': '#292524', // Editor background color (given)
19
+ 'editorCursor.foreground': '#f5f5f4', // Cursor color
20
+ 'editor.lineHighlightBackground': '#44403c', // Highlighted line color
21
+ 'editorLineNumber.foreground': '#78716c', // Line Numbers color
22
+ 'editor.selectionBackground': '#44403c', // Selection color
23
+ 'editor.foreground': '#d6d3d1', // Main text color
24
+ 'editorIndentGuide.background': '#78716c', // Indent guides color
25
+ 'editorIndentGuide.activeBackground': '#a8a29e', // Active indent guides color
26
+ 'editorWhitespace.foreground': '#a8a29e', // Whitespace symbols color
27
  // Add more color overrides if needed here
28
  },
29
  })
 
33
  }
34
 
35
  export function ScriptEditor() {
36
+ const editor = useEditor(s => s.editor)
37
+ const setEditor = useEditor(s => s.setEditor)
38
+ const draft = useEditor(s => s.draft)
39
+ const setDraft = useEditor(s => s.setDraft)
40
+ const loadDraftFromClap = useEditor(s => s.loadDraftFromClap)
41
+ const onDidScrollChange = useEditor(s => s.onDidScrollChange)
42
+ const jumpCursorOnLineClick = useEditor(s => s.jumpCursorOnLineClick)
43
+
44
+ // this is an expensive function, we should only call it on blur or on click on a "save button maybe"
45
+ const publishDraftToTimeline = useEditor(s => s.publishDraftToTimeline)
46
+
47
+ const clap = useTimeline((s: TimelineStore) => s.clap)
48
+ const cursorTimestampAtInMs = useTimeline(s => s.cursorTimestampAtInMs)
49
+ const totalDurationInMs = useTimeline(s => s.totalDurationInMs)
50
+ const scrollX = useTimeline(s => s.scrollX)
51
+ const contentWidth = useTimeline(s => s.contentWidth)
52
+
53
+ useEffect(() => { loadDraftFromClap(clap) }, [clap])
54
+
55
+ const scrollTop = useEditor(s => s.scrollTop)
56
+ const scrollLeft = useEditor(s => s.scrollLeft)
57
+ const scrollWidth = useEditor(s => s.scrollWidth)
58
+ const scrollHeight = useEditor(s => s.scrollHeight)
59
+
60
+ /*
61
+ const script = useTimeline(state => state.script)
62
+
63
  const isPlaying = useApp(state => state.isPlaying)
64
  const setCursorAt = useApp((state) => state.setCursorAt)
65
  const [scriptContent, setScriptContent] = useState("")
66
+ */
67
+
68
+ const currentSegment = useRenderer(s => s.currentSegment)
69
 
70
+ const activeStartTimeInLines = currentSegment?.startTimeInLines
71
+
72
+ useEffect(() => {
73
+ console.log("activeStartTimeInLines:", activeStartTimeInLines)
74
+
75
+ }, [activeStartTimeInLines])
76
+
77
+ useEffect(() => {
78
+ console.log("scrollX:", scrollX)
79
+
80
+ }, [scrollX])
81
+
82
+ /*
83
+ const activeSceneLineNumber = (activeScene?.startAtLine || 0)
84
+ */
85
+
86
+ /*
87
  const stepsToPreviews = useApp(state => state.stepsToPreviews)
88
 
89
  const screenplayScroll = useInterface(state => state.screenplayScroll)
 
95
 
96
  // console.log("linesToPreview:", linesToPreviews)
97
 
98
+ */
99
+ /*
100
  useEffect(() => {
101
  if (editor && leftmostVisibleScene) {
102
  // console.log("ScriptEditor: timelineScrollLeftInStep changed to scene " + leftmostVisibleScene.line)
 
107
  editor.revealLineInCenter(lineNumber)
108
  }
109
  }, [editor, leftmostVisibleScene])
110
+ */
111
 
112
 
113
+ /*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  useEffect(() => {
115
  if (editor && activeSceneLineNumber) {
116
  // console.log("useEffect:", activeSceneLineNumber)
 
137
  }
138
  }
139
  }, [editor, activeSceneLineNumber])
140
+ */
141
  const onMount = (editor: MonacoEditor.editor.IStandaloneCodeEditor) => {
142
  const model = editor.getModel()
143
  if (!model) { return }
 
145
  setEditor(editor)
146
 
147
  editor.onMouseDown((e) => {
148
+ jumpCursorOnLineClick(editor.getPosition()?.lineNumber)
 
 
 
 
 
 
 
 
 
 
 
 
149
  })
150
 
151
  editor.onDidScrollChange(({ scrollTop, scrollLeft, scrollWidth, scrollHeight }: MonacoEditor.IScrollEvent) => {
152
+ onDidScrollChange(
153
+ { scrollTop, scrollLeft, scrollWidth, scrollHeight },
154
+ true // <- set to true to ignore the change and avoid an infinite loop
155
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  })
157
 
158
  // as an optimization we can use this later, for surgical edits,
 
167
  }
168
 
169
  const onChange = (plainText?: string) => {
170
+ // setDraft(plainText || "")
 
 
 
 
 
 
 
171
  }
172
 
173
  return (
 
175
  <Editor
176
  height="100%"
177
  defaultLanguage="plaintext"
178
+ defaultValue={draft}
179
  beforeMount={beforeMount}
180
  onMount={onMount}
181
  onChange={onChange}
182
  theme="customTheme"
183
  options={{
184
+ fontSize: 12
185
  }}
186
  />
187
  </div>
src/components/monitor/DynamicPlayer/StoryboardBuffer.tsx CHANGED
@@ -16,11 +16,16 @@ export function StoryboardBuffer({
16
  `absolute`,
17
  `h-full rounded-md overflow-hidden`,
18
 
19
- // iseally we could only use the ease-out and duration-150
20
  // to avoid a weird fade to grey,
21
  // but the ease out also depends on which video is on top of each other,
22
  // in term of z-index, so we should also intervert this
23
  `transition-all duration-100 ease-out`,
 
 
 
 
 
24
  className
25
  )}
26
  src={src}
 
16
  `absolute`,
17
  `h-full rounded-md overflow-hidden`,
18
 
19
+ // ideally we could only use the ease-out and duration-150
20
  // to avoid a weird fade to grey,
21
  // but the ease out also depends on which video is on top of each other,
22
  // in term of z-index, so we should also intervert this
23
  `transition-all duration-100 ease-out`,
24
+
25
+ // yeah well, I couldn't find a better name
26
+ // (except maybe aspect-128/135, but is that really more familiar?)
27
+ `object-cover aspect-1024/576`,
28
+
29
  className
30
  )}
31
  src={src}
src/components/monitor/DynamicPlayer/index.tsx CHANGED
@@ -75,7 +75,9 @@ export const DynamicPlayer = ({
75
  ])
76
 
77
  return (
78
- <div className={cn(`@container flex flex-col items-center flex-grow w-full`, className)}>
 
 
79
  <DynamicBuffer
80
  segment={dataUriBuffer1}
81
  isPlaying={isPlaying}
 
75
  ])
76
 
77
  return (
78
+ <div className={cn(`
79
+ @container flex flex-col items-center flex-grow w-full
80
+ `, className)}>
81
  <DynamicBuffer
82
  segment={dataUriBuffer1}
83
  isPlaying={isPlaying}
src/components/settings/constants.ts CHANGED
@@ -270,7 +270,7 @@ export const availableModelsForVideoGeneration: Partial<Record<ComputeProvider,
270
  ],
271
  [ComputeProvider.HUGGINGFACE]: [
272
  "spaces/hpcai-tech/open-sora",
273
- "spaces/multimodalart/hallo" // supports audio input
274
  ],
275
  [ComputeProvider.REPLICATE]: [
276
  // note: we need a model that accepts cinematic ratios
 
270
  ],
271
  [ComputeProvider.HUGGINGFACE]: [
272
  "spaces/hpcai-tech/open-sora",
273
+ "spaces/jbilcke-hf/hallo-api" // supports audio input
274
  ],
275
  [ComputeProvider.REPLICATE]: [
276
  // note: we need a model that accepts cinematic ratios
src/components/settings/image.tsx CHANGED
@@ -12,6 +12,8 @@ export function SettingsSectionImage() {
12
  const imageProvider = useSettings(s => s.imageProvider)
13
  const setImageProvider = useSettings(s => s.setImageProvider)
14
 
 
 
15
  const huggingFaceModelForImage = useSettings(s => s.huggingFaceModelForImage)
16
  const setHuggingFaceModelForImage = useSettings(s => s.setHuggingFaceModelForImage)
17
 
@@ -23,6 +25,7 @@ export function SettingsSectionImage() {
23
 
24
  const modelsLabModelForImage = useSettings(s => s.modelsLabModelForImage)
25
  const setModelsLabModelForImage = useSettings(s => s.setModelsLabModelForImage)
 
26
 
27
  const imagePromptPrefix = useSettings(s => s.imagePromptPrefix)
28
  const setImagePromptPrefix = useSettings(s => s.setImagePromptPrefix)
@@ -100,6 +103,7 @@ export function SettingsSectionImage() {
100
 
101
  </>
102
  : // "proprietary" parameters
 
103
  <>
104
  {imageProvider === ComputeProvider.HUGGINGFACE && <FormInput
105
  label="HF Model ID (must be compatible with the Inference API)"
@@ -125,7 +129,9 @@ export function SettingsSectionImage() {
125
  defaultValue={defaultSettings.modelsLabModelForImage}
126
  onChange={setModelsLabModelForImage}
127
  />}
128
- </>}
 
 
129
 
130
  </FormSection>
131
  </div>
 
12
  const imageProvider = useSettings(s => s.imageProvider)
13
  const setImageProvider = useSettings(s => s.setImageProvider)
14
 
15
+ /*
16
+ to deprecate I think - or replace by defaultHuggingFaceModelForXXXX
17
  const huggingFaceModelForImage = useSettings(s => s.huggingFaceModelForImage)
18
  const setHuggingFaceModelForImage = useSettings(s => s.setHuggingFaceModelForImage)
19
 
 
25
 
26
  const modelsLabModelForImage = useSettings(s => s.modelsLabModelForImage)
27
  const setModelsLabModelForImage = useSettings(s => s.setModelsLabModelForImage)
28
+ */
29
 
30
  const imagePromptPrefix = useSettings(s => s.imagePromptPrefix)
31
  const setImagePromptPrefix = useSettings(s => s.setImagePromptPrefix)
 
103
 
104
  </>
105
  : // "proprietary" parameters
106
+ null/*
107
  <>
108
  {imageProvider === ComputeProvider.HUGGINGFACE && <FormInput
109
  label="HF Model ID (must be compatible with the Inference API)"
 
129
  defaultValue={defaultSettings.modelsLabModelForImage}
130
  onChange={setModelsLabModelForImage}
131
  />}
132
+ </>
133
+ */
134
+ }
135
 
136
  </FormSection>
137
  </div>
src/components/settings/music.tsx CHANGED
@@ -12,6 +12,7 @@ export function SettingsSectionMusic() {
12
  const musicProvider = useSettings(s => s.musicProvider)
13
  const setMusicProvider = useSettings(s => s.setMusicProvider)
14
 
 
15
  const huggingFaceModelForMusic = useSettings(s => s.huggingFaceModelForMusic)
16
  const setHuggingFaceModelForMusic = useSettings(s => s.setHuggingFaceModelForMusic)
17
 
@@ -23,6 +24,7 @@ export function SettingsSectionMusic() {
23
 
24
  const modelsLabModelForMusic = useSettings(s => s.modelsLabModelForMusic)
25
  const setModelsLabModelForMusic = useSettings(s => s.setModelsLabModelForMusic)
 
26
 
27
  const comfyWorkflowForMusic = useSettings(s => s.comfyWorkflowForMusic)
28
  const setComfyWorkflowForMusic = useSettings(s => s.setComfyWorkflowForMusic)
@@ -55,6 +57,7 @@ export function SettingsSectionMusic() {
55
  onChange={setComfyWorkflowForMusic}
56
  />
57
  : // "proprietary" parameters
 
58
  <>
59
  {musicProvider === ComputeProvider.HUGGINGFACE && <FormInput
60
  label="HF Model ID (must be compatible with the Inference API)"
@@ -80,7 +83,8 @@ export function SettingsSectionMusic() {
80
  defaultValue={defaultSettings.modelsLabModelForMusic}
81
  onChange={setModelsLabModelForMusic}
82
  />}
83
- </>}
 
84
  </FormSection>
85
  </div>
86
  )
 
12
  const musicProvider = useSettings(s => s.musicProvider)
13
  const setMusicProvider = useSettings(s => s.setMusicProvider)
14
 
15
+ /*
16
  const huggingFaceModelForMusic = useSettings(s => s.huggingFaceModelForMusic)
17
  const setHuggingFaceModelForMusic = useSettings(s => s.setHuggingFaceModelForMusic)
18
 
 
24
 
25
  const modelsLabModelForMusic = useSettings(s => s.modelsLabModelForMusic)
26
  const setModelsLabModelForMusic = useSettings(s => s.setModelsLabModelForMusic)
27
+ */
28
 
29
  const comfyWorkflowForMusic = useSettings(s => s.comfyWorkflowForMusic)
30
  const setComfyWorkflowForMusic = useSettings(s => s.setComfyWorkflowForMusic)
 
57
  onChange={setComfyWorkflowForMusic}
58
  />
59
  : // "proprietary" parameters
60
+ null /*
61
  <>
62
  {musicProvider === ComputeProvider.HUGGINGFACE && <FormInput
63
  label="HF Model ID (must be compatible with the Inference API)"
 
83
  defaultValue={defaultSettings.modelsLabModelForMusic}
84
  onChange={setModelsLabModelForMusic}
85
  />}
86
+ </>
87
+ */}
88
  </FormSection>
89
  </div>
90
  )
src/components/settings/sound.tsx CHANGED
@@ -12,6 +12,7 @@ export function SettingsSectionSound() {
12
  const soundProvider = useSettings(s => s.soundProvider)
13
  const setSoundProvider = useSettings(s => s.setSoundProvider)
14
 
 
15
  const huggingFaceModelForSound = useSettings(s => s.huggingFaceModelForSound)
16
  const setHuggingFaceModelForSound = useSettings(s => s.setHuggingFaceModelForSound)
17
 
@@ -23,7 +24,7 @@ export function SettingsSectionSound() {
23
 
24
  const modelsLabModelForSound = useSettings(s => s.modelsLabModelForSound)
25
  const setModelsLabModelForSound = useSettings(s => s.setModelsLabModelForSound)
26
-
27
  const comfyWorkflowForSound = useSettings(s => s.comfyWorkflowForSound)
28
  const setComfyWorkflowForSound = useSettings(s => s.setComfyWorkflowForSound)
29
 
@@ -55,6 +56,7 @@ export function SettingsSectionSound() {
55
  onChange={setComfyWorkflowForSound}
56
  />
57
  : // "proprietary" parameters
 
58
  <>
59
  {soundProvider === ComputeProvider.HUGGINGFACE && <FormInput
60
  label="HF Model ID (must be compatible with the Inference API)"
@@ -80,7 +82,8 @@ export function SettingsSectionSound() {
80
  defaultValue={defaultSettings.modelsLabModelForSound}
81
  onChange={setModelsLabModelForSound}
82
  />}
83
- </>}
 
84
  </FormSection>
85
  </div>
86
  )
 
12
  const soundProvider = useSettings(s => s.soundProvider)
13
  const setSoundProvider = useSettings(s => s.setSoundProvider)
14
 
15
+ /*
16
  const huggingFaceModelForSound = useSettings(s => s.huggingFaceModelForSound)
17
  const setHuggingFaceModelForSound = useSettings(s => s.setHuggingFaceModelForSound)
18
 
 
24
 
25
  const modelsLabModelForSound = useSettings(s => s.modelsLabModelForSound)
26
  const setModelsLabModelForSound = useSettings(s => s.setModelsLabModelForSound)
27
+ */
28
  const comfyWorkflowForSound = useSettings(s => s.comfyWorkflowForSound)
29
  const setComfyWorkflowForSound = useSettings(s => s.setComfyWorkflowForSound)
30
 
 
56
  onChange={setComfyWorkflowForSound}
57
  />
58
  : // "proprietary" parameters
59
+ null /*
60
  <>
61
  {soundProvider === ComputeProvider.HUGGINGFACE && <FormInput
62
  label="HF Model ID (must be compatible with the Inference API)"
 
82
  defaultValue={defaultSettings.modelsLabModelForSound}
83
  onChange={setModelsLabModelForSound}
84
  />}
85
+ </>
86
+ */}
87
  </FormSection>
88
  </div>
89
  )
src/components/settings/video.tsx CHANGED
@@ -9,6 +9,7 @@ import { availableComputeProvidersForVideos, computeProviderShortNames } from ".
9
  export function SettingsSectionVideo() {
10
  const defaultSettings = getDefaultSettingsState()
11
 
 
12
  const huggingFaceModelForVideo = useSettings(s => s.huggingFaceModelForVideo)
13
  const setHuggingFaceModelForVideo = useSettings(s => s.setHuggingFaceModelForVideo)
14
 
@@ -20,7 +21,7 @@ export function SettingsSectionVideo() {
20
 
21
  const modelsLabModelForVideo = useSettings(s => s.modelsLabModelForVideo)
22
  const setModelsLabModelForVideo = useSettings(s => s.setModelsLabModelForVideo)
23
-
24
 
25
  const videoPromptPrefix = useSettings(s => s.videoPromptPrefix)
26
  const setVideoPromptPrefix = useSettings(s => s.setVideoPromptPrefix)
@@ -118,7 +119,7 @@ export function SettingsSectionVideo() {
118
  onChange={setComfyWorkflowForVideo}
119
  />
120
  : // "proprietary" parameters
121
- <>
122
  {videoProvider === ComputeProvider.HUGGINGFACE && <FormInput
123
  label="HF Model ID (must be compatible with the Inference API)"
124
  value={huggingFaceModelForVideo}
@@ -143,7 +144,8 @@ export function SettingsSectionVideo() {
143
  defaultValue={defaultSettings.modelsLabModelForVideo}
144
  onChange={setModelsLabModelForVideo}
145
  />}
146
- </>}
 
147
 
148
  </FormSection>
149
  </div>
 
9
  export function SettingsSectionVideo() {
10
  const defaultSettings = getDefaultSettingsState()
11
 
12
+ /*
13
  const huggingFaceModelForVideo = useSettings(s => s.huggingFaceModelForVideo)
14
  const setHuggingFaceModelForVideo = useSettings(s => s.setHuggingFaceModelForVideo)
15
 
 
21
 
22
  const modelsLabModelForVideo = useSettings(s => s.modelsLabModelForVideo)
23
  const setModelsLabModelForVideo = useSettings(s => s.setModelsLabModelForVideo)
24
+ */
25
 
26
  const videoPromptPrefix = useSettings(s => s.videoPromptPrefix)
27
  const setVideoPromptPrefix = useSettings(s => s.setVideoPromptPrefix)
 
119
  onChange={setComfyWorkflowForVideo}
120
  />
121
  : // "proprietary" parameters
122
+ null /* <>
123
  {videoProvider === ComputeProvider.HUGGINGFACE && <FormInput
124
  label="HF Model ID (must be compatible with the Inference API)"
125
  value={huggingFaceModelForVideo}
 
144
  defaultValue={defaultSettings.modelsLabModelForVideo}
145
  onChange={setModelsLabModelForVideo}
146
  />}
147
+ </>
148
+ */}
149
 
150
  </FormSection>
151
  </div>
src/components/settings/voice.tsx CHANGED
@@ -12,6 +12,7 @@ export function SettingsSectionVoice() {
12
  const voiceProvider = useSettings(s => s.voiceProvider)
13
  const setVoiceProvider = useSettings(s => s.setVoiceProvider)
14
 
 
15
  const huggingFaceModelForVoice = useSettings(s => s.huggingFaceModelForVoice)
16
  const setHuggingFaceModelForVoice = useSettings(s => s.setHuggingFaceModelForVoice)
17
 
@@ -23,6 +24,7 @@ export function SettingsSectionVoice() {
23
 
24
  const modelsLabModelForVoice = useSettings(s => s.modelsLabModelForVoice)
25
  const setModelsLabModelForVoice = useSettings(s => s.setModelsLabModelForVoice)
 
26
 
27
  const comfyWorkflowForVoice = useSettings(s => s.comfyWorkflowForVoice)
28
  const setComfyWorkflowForVoice = useSettings(s => s.setComfyWorkflowForVoice)
@@ -55,7 +57,7 @@ export function SettingsSectionVoice() {
55
  onChange={setComfyWorkflowForVoice}
56
  />
57
  : // "proprietary" parameters
58
- <>
59
  {voiceProvider === ComputeProvider.HUGGINGFACE && <FormInput
60
  label="HF Model ID (must be compatible with the Inference API)"
61
  value={huggingFaceModelForVoice}
@@ -80,7 +82,8 @@ export function SettingsSectionVoice() {
80
  defaultValue={defaultSettings.modelsLabModelForVoice}
81
  onChange={setModelsLabModelForVoice}
82
  />}
83
- </>}
 
84
  </FormSection>
85
  </div>
86
  )
 
12
  const voiceProvider = useSettings(s => s.voiceProvider)
13
  const setVoiceProvider = useSettings(s => s.setVoiceProvider)
14
 
15
+ /*
16
  const huggingFaceModelForVoice = useSettings(s => s.huggingFaceModelForVoice)
17
  const setHuggingFaceModelForVoice = useSettings(s => s.setHuggingFaceModelForVoice)
18
 
 
24
 
25
  const modelsLabModelForVoice = useSettings(s => s.modelsLabModelForVoice)
26
  const setModelsLabModelForVoice = useSettings(s => s.setModelsLabModelForVoice)
27
+ */
28
 
29
  const comfyWorkflowForVoice = useSettings(s => s.comfyWorkflowForVoice)
30
  const setComfyWorkflowForVoice = useSettings(s => s.setComfyWorkflowForVoice)
 
57
  onChange={setComfyWorkflowForVoice}
58
  />
59
  : // "proprietary" parameters
60
+ null /* <>
61
  {voiceProvider === ComputeProvider.HUGGINGFACE && <FormInput
62
  label="HF Model ID (must be compatible with the Inference API)"
63
  value={huggingFaceModelForVoice}
 
82
  defaultValue={defaultSettings.modelsLabModelForVoice}
83
  onChange={setModelsLabModelForVoice}
84
  />}
85
+ </>
86
+ */}
87
  </FormSection>
88
  </div>
89
  )
src/controllers/audio/startAudioSourceNode.ts CHANGED
@@ -40,11 +40,23 @@ export function startAudioSourceNode({
40
  * So make sure it uses fresh data when it is finally executed
41
  */
42
  onEnded: (sourceId: string) => void
43
- }): CurrentlyPlayingAudioSource {
44
  if (!segment.audioBuffer) {
45
- throw new Error(`Cannot playAudioBuffer on non-audio segments`)
 
 
46
  }
47
 
 
 
 
 
 
 
 
 
 
 
48
  // const audioContext = new AudioContext() // initialize AudioContext
49
 
50
  // Get an AudioBufferSourceNode.
@@ -58,10 +70,10 @@ export function startAudioSourceNode({
58
 
59
  const gainNode: GainNode = audioContext.createGain()
60
 
61
- if (isFinite(segment.outputGain)) {
62
  gainNode.gain.value = segment.outputGain
63
  } else {
64
- console.log(`segment.outputGain isn't finite for some reason? (got value ${segment.outputGain})`)
65
  gainNode.gain.value = 1.0
66
  }
67
 
@@ -71,11 +83,8 @@ export function startAudioSourceNode({
71
  // connect the gain node to the destination
72
  gainNode.connect(audioContext.destination)
73
 
74
- // make sure we play the segment at a specific time
75
- const startTimeInMs = cursorTimestampAtInMs - segment.startTimeInMs
76
-
77
  // convert milliseconds to seconds by dividing by 1000
78
- source.start(audioContext.currentTime, startTimeInMs >= 1000 ? (startTimeInMs / 1000) : 0)
79
 
80
  const currentlyPlaying: CurrentlyPlayingAudioSource = {
81
  sourceId: UUID(),
 
40
  * So make sure it uses fresh data when it is finally executed
41
  */
42
  onEnded: (sourceId: string) => void
43
+ }): CurrentlyPlayingAudioSource | undefined {
44
  if (!segment.audioBuffer) {
45
+ // throw new Error(`startAudioSourceNode: cannot play a non-audio segment`)
46
+ // console.error(`startAudioSourceNode: cannot play a non-audio segment`)
47
+ return
48
  }
49
 
50
+ // well, we can't play the segment if the playback cursor is out of its range
51
+ if (cursorTimestampAtInMs < segment.startTimeInMs || segment.endTimeInMs < cursorTimestampAtInMs) {
52
+ // onsole.error(`startAudioSourceNode: cannot play a segment which is nto crossing the current cursor`)
53
+ return
54
+ }
55
+
56
+ const startTimeInMs = Math.max(0, cursorTimestampAtInMs - segment.startTimeInMs)
57
+
58
+ // console.log(`startAudioSourceNode: ${startTimeInMs}ms`)
59
+
60
  // const audioContext = new AudioContext() // initialize AudioContext
61
 
62
  // Get an AudioBufferSourceNode.
 
70
 
71
  const gainNode: GainNode = audioContext.createGain()
72
 
73
+ if (!isNaN(segment.outputGain) && isFinite(segment.outputGain)) {
74
  gainNode.gain.value = segment.outputGain
75
  } else {
76
+ // console.error(`segment.outputGain isn't finite for some reason? (got value ${segment.outputGain})`)
77
  gainNode.gain.value = 1.0
78
  }
79
 
 
83
  // connect the gain node to the destination
84
  gainNode.connect(audioContext.destination)
85
 
 
 
 
86
  // convert milliseconds to seconds by dividing by 1000
87
+ source.start(audioContext.currentTime, startTimeInMs / 1000)
88
 
89
  const currentlyPlaying: CurrentlyPlayingAudioSource = {
90
  sourceId: UUID(),
src/controllers/audio/useAudio.ts CHANGED
@@ -3,9 +3,10 @@
3
  import { create } from "zustand"
4
  import { TimelineStore, useTimeline, RuntimeSegment } from "@aitube/timeline"
5
 
6
- import { AudioStore } from "./types"
7
  import { getDefaultAudioState } from "./getDefaultAudioState"
8
  import { startAudioSourceNode } from "./startAudioSourceNode"
 
9
 
10
  export const useAudio = create<AudioStore>((set, get) => ({
11
  ...getDefaultAudioState(),
@@ -14,7 +15,7 @@ export const useAudio = create<AudioStore>((set, get) => ({
14
  // console.log("useAudio: play()")
15
  const { isPlaying, currentlyPlaying } = get()
16
  if (isPlaying) { return }
17
- currentlyPlaying.forEach(p => { p.sourceNode.start() })
18
  },
19
  stop: () => {
20
  // console.log("useAudio: stop()")
@@ -59,7 +60,7 @@ export const useAudio = create<AudioStore>((set, get) => ({
59
  * @returns
60
  */
61
  syncAudioToCurrentCursorPosition: (activeAudioSegments: RuntimeSegment[]) => {
62
- // console.log("useAudio: syncAudioToCurrentCursorPosition()")
63
  const { audioContext, currentlyPlaying } = get()
64
 
65
  const timelineStore: TimelineStore = useTimeline.getState()
@@ -89,7 +90,7 @@ export const useAudio = create<AudioStore>((set, get) => ({
89
  })
90
  }
91
  })
92
- )
93
 
94
  set({
95
  currentlyPlaying: [
 
3
  import { create } from "zustand"
4
  import { TimelineStore, useTimeline, RuntimeSegment } from "@aitube/timeline"
5
 
6
+ import { AudioStore, CurrentlyPlayingAudioSource } from "./types"
7
  import { getDefaultAudioState } from "./getDefaultAudioState"
8
  import { startAudioSourceNode } from "./startAudioSourceNode"
9
+ import { useRenderer } from "../renderer"
10
 
11
  export const useAudio = create<AudioStore>((set, get) => ({
12
  ...getDefaultAudioState(),
 
15
  // console.log("useAudio: play()")
16
  const { isPlaying, currentlyPlaying } = get()
17
  if (isPlaying) { return }
18
+ currentlyPlaying.forEach(p => p.sourceNode.start())
19
  },
20
  stop: () => {
21
  // console.log("useAudio: stop()")
 
60
  * @returns
61
  */
62
  syncAudioToCurrentCursorPosition: (activeAudioSegments: RuntimeSegment[]) => {
63
+
64
  const { audioContext, currentlyPlaying } = get()
65
 
66
  const timelineStore: TimelineStore = useTimeline.getState()
 
90
  })
91
  }
92
  })
93
+ ).filter(s => s) as CurrentlyPlayingAudioSource[]
94
 
95
  set({
96
  currentlyPlaying: [
src/controllers/editor/getDefaultEditorState.ts ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { EditorState } from "./types"
2
+
3
+ export function getDefaultEditorState(): EditorState {
4
+ const state: EditorState = {
5
+ editor: undefined,
6
+ draft: "",
7
+ lineNumberToMentionedSegments: {},
8
+
9
+ scrollChanges: 0,
10
+ scrollHeight: 0,
11
+ scrollLeft: 0,
12
+ scrollTop: 0,
13
+ scrollWidth: 0,
14
+ scrollTopInMs: 0,
15
+ }
16
+
17
+ return state
18
+ }
src/controllers/editor/types.ts ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { ClapProject, ClapSegment } from "@aitube/clap"
2
+ import MonacoEditor from "monaco-editor"
3
+
4
+ export type ScrollData = {
5
+ scrollHeight: number
6
+ scrollLeft: number
7
+ scrollTop: number
8
+ scrollWidth: number
9
+ }
10
+
11
+ export type EditorState = {
12
+ // reference to the React component
13
+ editor?: MonacoEditor.editor.IStandaloneCodeEditor
14
+
15
+ // the full-text of the screenplay
16
+ draft: string
17
+
18
+ // map screenplay lines to dialogue segments
19
+ // (note: some lines point to nothing, eg. when we have empty spaces)
20
+ lineNumberToMentionedSegments: Record<number, ClapSegment>
21
+
22
+ scrollChanges: number
23
+
24
+ /**
25
+ * the index of the first step visible in the current screenplay
26
+ *
27
+ * (the topmost visible timeline step in the current timeline)
28
+ */
29
+ scrollTopInMs: number
30
+ } & ScrollData
31
+
32
+ export type EditorControls = {
33
+ setEditor: (editor?: MonacoEditor.editor.IStandaloneCodeEditor) => void
34
+ loadDraftFromClap: (clap: ClapProject) => void
35
+ setDraft: (draft: string) => void
36
+ publishDraftToTimeline: () => Promise<void>
37
+ onDidScrollChange: (scrollData: ScrollData, ignoreChange?: boolean) => void
38
+ jumpCursorOnLineClick: (line?: number) => void
39
+ }
40
+
41
+ export type EditorStore = EditorState & EditorControls
src/controllers/editor/useEditor.ts ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client"
2
+
3
+ import MonacoEditor from "monaco-editor"
4
+ import { create } from "zustand"
5
+
6
+ import { EditorStore, ScrollData } from "./types"
7
+ import { getDefaultEditorState } from "./getDefaultEditorState"
8
+ import { ClapProject, ClapSegment, ClapSegmentCategory } from "@aitube/clap"
9
+ import { TimelineStore, useTimeline, leftBarTrackScaleWidth } from "@aitube/timeline"
10
+
11
+ export const useEditor = create<EditorStore>((set, get) => ({
12
+ ...getDefaultEditorState(),
13
+ setEditor: (editor?: MonacoEditor.editor.IStandaloneCodeEditor) => { set({ editor }) },
14
+ loadDraftFromClap: (clap: ClapProject) => {
15
+ const { setDraft } = get()
16
+
17
+ setDraft(clap.meta.screenplay)
18
+ },
19
+ setDraft: (draft: string) => {
20
+ const { draft: previousDraft } = get()
21
+ if (draft === previousDraft) { return }
22
+ set({ draft })
23
+
24
+ const { editor } = get()
25
+ if (!editor) { return }
26
+ editor?.setValue(draft)
27
+ },
28
+ publishDraftToTimeline: async (): Promise<void> => {
29
+ const { draft } = get()
30
+ console.log(`user asked to update the whole scene! this is expensive..`)
31
+ // we can do something smart, which is to only reconstruct the impacted segments
32
+ // and shift the rest along the time axis, without modifying it
33
+ },
34
+ onDidScrollChange: ({
35
+ scrollHeight,
36
+ scrollLeft,
37
+ scrollTop,
38
+ scrollWidth
39
+ }: ScrollData, ignoreChange = false) => {
40
+ const {
41
+ scrollHeight: previousScrollHeight,
42
+ scrollLeft: previousScrollLeft,
43
+ scrollTop: previousScrollTop,
44
+ scrollWidth: previousScrollWidth,
45
+ scrollChanges
46
+ } = get()
47
+
48
+ // skip if nothing changed
49
+ if (
50
+ scrollHeight === previousScrollHeight &&
51
+ scrollLeft === previousScrollLeft &&
52
+ scrollTop === previousScrollTop &&
53
+ scrollWidth === previousScrollWidth
54
+ ) {
55
+ return
56
+ }
57
+
58
+ set({
59
+ scrollHeight,
60
+ scrollLeft,
61
+ scrollTop,
62
+ scrollWidth,
63
+
64
+ // optionally mark the state as stale
65
+ scrollChanges: scrollChanges + (ignoreChange ? 0 : 1),
66
+ })
67
+
68
+ const timeline: TimelineStore = useTimeline.getState()
69
+ if (!timeline.timelineCamera || !timeline.timelineControls) { return }
70
+
71
+ const { editor } = get()
72
+
73
+ const scrollRatio = scrollTop / scrollHeight
74
+ const scrollX = leftBarTrackScaleWidth + scrollRatio * timeline.contentWidth
75
+ console.log({
76
+ scrollHeight,
77
+ scrollLeft,
78
+ scrollTop,
79
+ scrollWidth,
80
+ scrollRatio,
81
+ scrollX
82
+ })
83
+ useTimeline.setState({ scrollX })
84
+ timeline.timelineCamera.position.setX(scrollX)
85
+ timeline.timelineControls.target.setX(scrollX)
86
+ },
87
+ jumpCursorOnLineClick: (line?: number) => {
88
+ if (typeof line !== "number") { return }
89
+ const timeline: TimelineStore = useTimeline.getState()
90
+
91
+ const { lineNumberToMentionedSegments } = timeline
92
+
93
+ const mentionedSegments = lineNumberToMentionedSegments[line] || []
94
+
95
+ const firstMentionedSegment = mentionedSegments.at(0)
96
+
97
+
98
+ if (typeof firstMentionedSegment?.startTimeInMs !== "number") { return }
99
+
100
+ const { startTimeInMs } = firstMentionedSegment
101
+
102
+ timeline.setCursorTimestampAtInMs(startTimeInMs)
103
+ },
104
+
105
+ }))
106
+
107
+
108
+ if (typeof window !== "undefined") {
109
+ (window as any).useEditor = useEditor
110
+ }
src/controllers/io/useIO.ts CHANGED
@@ -2,6 +2,7 @@
2
 
3
  import { ClapProject, ClapSegment, ClapSegmentCategory, ClapSegmentStatus, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
4
  import { TimelineStore, useTimeline } from "@aitube/timeline"
 
5
  import { create } from "zustand"
6
  import { mltToXml } from "mlt-xml"
7
 
@@ -98,20 +99,25 @@ export const useIO = create<IOStore>((set, get) => ({
98
  message: "Analyzing screenplay..",
99
  value: 10
100
  })
101
- try {
102
- const res = await fetch("https://jbilcke-hf-broadway-api.hf.space", {
103
- method: "POST",
104
- headers: { 'Content-Type': 'text/plain' },
105
- body: plainText,
106
- })
107
- const blob = await res.blob()
108
- task.setProgress({
109
- message: "Loading scenes..",
110
- value: 50
111
- })
112
- // TODO: parseClap should feature a progress callback
113
- const clap = await parseClap(blob)
114
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  clap.meta.title = `${projectName || ""}`
116
 
117
  task.setProgress({
@@ -430,7 +436,7 @@ export const useIO = create<IOStore>((set, get) => ({
430
  saveKdenline: async () => {
431
  const { saveAnyFile } = get()
432
  const clap: ClapProject = useTimeline.getState().clap
433
- // const tracks: Tracks = useTimeline.getState().tracks
434
 
435
  throw new Error(`cannot run in a browser, unfortunately`)
436
 
 
2
 
3
  import { ClapProject, ClapSegment, ClapSegmentCategory, ClapSegmentStatus, getClapAssetSourceType, newSegment, parseClap, serializeClap } from "@aitube/clap"
4
  import { TimelineStore, useTimeline } from "@aitube/timeline"
5
+ import { parseScriptToClap } from "@aitube/broadway"
6
  import { create } from "zustand"
7
  import { mltToXml } from "mlt-xml"
8
 
 
99
  message: "Analyzing screenplay..",
100
  value: 10
101
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
+ try {
104
+ // this is the old way, based on a call to a separate API hosted on HF
105
+ // obviously this wasn't very practical and easy to scale, so I'm dropping it
106
+ //
107
+ // const res = await fetch("https://jbilcke-hf-broadway-api.hf.space", {
108
+ // method: "POST",
109
+ // headers: { 'Content-Type': 'text/plain' },
110
+ // body: plainText,
111
+ // })
112
+ // const blob = await res.blob()
113
+ // task.setProgress({
114
+ // message: "Loading scenes..",
115
+ // value: 50
116
+ // })
117
+ // const clap = await parseClap(blob)
118
+
119
+ // new way: we analyze the screenplay on browser side
120
+ const clap = await parseScriptToClap(plainText)
121
  clap.meta.title = `${projectName || ""}`
122
 
123
  task.setProgress({
 
436
  saveKdenline: async () => {
437
  const { saveAnyFile } = get()
438
  const clap: ClapProject = useTimeline.getState().clap
439
+ // const tracks: ClapTracks = useTimeline.getState().tracks
440
 
441
  throw new Error(`cannot run in a browser, unfortunately`)
442
 
src/controllers/resolver/useResolver.ts CHANGED
@@ -507,7 +507,18 @@ export const useResolver = create<ResolverStore>((set, get) => ({
507
  //
508
  // note that video clips are also concerned: we want them to perfectly fit
509
  if (newSegment.category === ClapSegmentCategory.DIALOGUE) {
510
- await timeline.fitSegmentToAssetDuration(newSegment)
 
 
 
 
 
 
 
 
 
 
 
511
  } else if (newSegment.category === ClapSegmentCategory.VIDEO) {
512
  await timeline.fitSegmentToAssetDuration(newSegment)
513
  }
 
507
  //
508
  // note that video clips are also concerned: we want them to perfectly fit
509
  if (newSegment.category === ClapSegmentCategory.DIALOGUE) {
510
+ // by default fitSegmentToAssetDuration() will fit the segment to the asset duration without any gap,
511
+ // which can be weird to hear.. so let's add a little delay
512
+
513
+ // that is assuming that our dialogue lines have been properly cut,
514
+ //
515
+ await timeline.fitSegmentToAssetDuration(
516
+ newSegment,
517
+ typeof newSegment.assetDurationInMs === "number"
518
+ // this delay is arbitrary, could be another value (200, 500, 1200..)
519
+ ? newSegment.assetDurationInMs + 700
520
+ : 2000
521
+ )
522
  } else if (newSegment.category === ClapSegmentCategory.VIDEO) {
523
  await timeline.fitSegmentToAssetDuration(newSegment)
524
  }
src/controllers/settings/getDefaultSettingsState.ts CHANGED
@@ -84,18 +84,31 @@ export function getDefaultSettingsState(): SettingsState {
84
  comfyWorkflowForSound: "{}",
85
  comfyWorkflowForMusic: "{}",
86
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  // now how we prefix everything with "models"
88
  // that's because it will be possible at some point to also
89
  // call a space eg. spaces/openai/sora (this one is just a silly example, of course)
90
  // "models/HuggingFaceH4/zephyr-7b-beta"
91
  // "models/mistralai/Mixtral-8x7B-Instruct-v0.1",
 
92
  huggingFaceModelForAssistant: "models/mistralai/Mixtral-8x7B-Instruct-v0.1",
93
  huggingFaceModelForImage: "models/sd-community/sdxl-flash",
94
  huggingFaceModelForImageDepth: "",
95
  huggingFaceModelForImageSegmentation: "",
96
  huggingFaceModelForImageUpscaling: "",
97
 
98
- // huggingFaceModelForVideo: "spaces/multimodalart/hallo",
99
  huggingFaceModelForVideo: "spaces/hpcai-tech/open-sora",
100
  huggingFaceModelForVideoDepth: "",
101
  huggingFaceModelForVideoSegmentation: "",
@@ -105,16 +118,6 @@ export function getDefaultSettingsState(): SettingsState {
105
  huggingFaceModelForSound: "",
106
  huggingFaceModelForMusic: "",
107
 
108
- // those are not designed for Hugging Face specifically,
109
- // but to be compatible with any Gradio API URL that the
110
- // user would set manually (eg. running on localhost)
111
- gradioApiUrlForAssistant: "",
112
- gradioApiUrlForImage: "",
113
- gradioApiUrlForVideo: "",
114
- gradioApiUrlForVoice: "",
115
- gradioApiUrlForSound: "",
116
- gradioApiUrlForMusic: "",
117
-
118
  replicateModelForImage: "chenxwh/sdxl-flash:001bb81139b01780380407b4106ac681df46108e002eafbeb9ccb2d8faca42e1",
119
  replicateModelForImageDepth: "",
120
  replicateModelForImageSegmentation: "",
@@ -184,6 +187,8 @@ export function getDefaultSettingsState(): SettingsState {
184
  kitsAiModelForVoice: "",
185
  cohereModelForAssistant: "command-r-plus",
186
  mistralAiModelForAssistant: "open-mixtral-8x22b"
 
 
187
  }
188
  return state
189
  }
 
84
  comfyWorkflowForSound: "{}",
85
  comfyWorkflowForMusic: "{}",
86
 
87
+ // those are not designed for Hugging Face specifically,
88
+ // but to be compatible with any Gradio API URL that the
89
+ // user would set manually (eg. running on localhost)
90
+ gradioApiUrlForAssistant: "",
91
+ gradioApiUrlForImage: "",
92
+ gradioApiUrlForVideo: "",
93
+ gradioApiUrlForVoice: "",
94
+ gradioApiUrlForSound: "",
95
+ gradioApiUrlForMusic: "",
96
+
97
+ /******** should we deprecated all of those? or convert to defaults? ******
98
+ *
99
  // now how we prefix everything with "models"
100
  // that's because it will be possible at some point to also
101
  // call a space eg. spaces/openai/sora (this one is just a silly example, of course)
102
  // "models/HuggingFaceH4/zephyr-7b-beta"
103
  // "models/mistralai/Mixtral-8x7B-Instruct-v0.1",
104
+
105
  huggingFaceModelForAssistant: "models/mistralai/Mixtral-8x7B-Instruct-v0.1",
106
  huggingFaceModelForImage: "models/sd-community/sdxl-flash",
107
  huggingFaceModelForImageDepth: "",
108
  huggingFaceModelForImageSegmentation: "",
109
  huggingFaceModelForImageUpscaling: "",
110
 
111
+ // huggingFaceModelForVideo: "spaces/jbilcke-hf/hallo-api",
112
  huggingFaceModelForVideo: "spaces/hpcai-tech/open-sora",
113
  huggingFaceModelForVideoDepth: "",
114
  huggingFaceModelForVideoSegmentation: "",
 
118
  huggingFaceModelForSound: "",
119
  huggingFaceModelForMusic: "",
120
 
 
 
 
 
 
 
 
 
 
 
121
  replicateModelForImage: "chenxwh/sdxl-flash:001bb81139b01780380407b4106ac681df46108e002eafbeb9ccb2d8faca42e1",
122
  replicateModelForImageDepth: "",
123
  replicateModelForImageSegmentation: "",
 
187
  kitsAiModelForVoice: "",
188
  cohereModelForAssistant: "command-r-plus",
189
  mistralAiModelForAssistant: "open-mixtral-8x22b"
190
+
191
+ */
192
  }
193
  return state
194
  }
src/controllers/settings/types.ts CHANGED
@@ -88,6 +88,16 @@ export type SettingsState = {
88
 
89
  // ---------- MODELS FOR EACH PROVIDER --------------
90
 
 
 
 
 
 
 
 
 
 
 
91
  huggingFaceModelForAssistant: string
92
  huggingFaceModelForImage: string
93
  huggingFaceModelForImageDepth: string
@@ -101,13 +111,6 @@ export type SettingsState = {
101
  huggingFaceModelForSound: string
102
  huggingFaceModelForMusic: string
103
 
104
- gradioApiUrlForAssistant: string
105
- gradioApiUrlForImage: string
106
- gradioApiUrlForVideo: string
107
- gradioApiUrlForVoice: string
108
- gradioApiUrlForSound: string
109
- gradioApiUrlForMusic: string
110
-
111
  replicateModelForImage: string
112
  replicateModelForImageDepth: string
113
  replicateModelForImageSegmentation: string
@@ -171,6 +174,7 @@ export type SettingsState = {
171
  cohereModelForAssistant: string
172
 
173
  mistralAiModelForAssistant: string
 
174
  }
175
 
176
  export type SettingsControls = {
@@ -251,6 +255,15 @@ export type SettingsControls = {
251
  setComfyWorkflowForSound: (comfyWorkflowForSound?: string) => void
252
  setComfyWorkflowForMusic: (comfyWorkflowForMusic?: string) => void
253
 
 
 
 
 
 
 
 
 
 
254
  setHuggingFaceModelForAssistant: (huggingFaceModelForAssistant?: string) => void
255
  setHuggingFaceModelForImage: (huggingFaceModelForImage?: string) => void
256
  setHuggingFaceModelForImageDepth: (huggingFaceModelForImageDepth?: string) => void
@@ -264,13 +277,6 @@ export type SettingsControls = {
264
  setHuggingFaceModelForSound: (huggingFaceModelForSound?: string) => void
265
  setHuggingFaceModelForMusic: (huggingFaceModelForMusic?: string) => void
266
 
267
- setGradioApiUrlForAssistant: (gradioApiUrlForAssistant?: string) => void
268
- setGradioApiUrlForImage: (gradioApiUrlForImage?: string) => void
269
- setGradioApiUrlForVideo: (gradioApiUrlForVideo?: string) => void
270
- setGradioApiUrlForVoice: (gradioApiUrlForVoice?: string) => void
271
- setGradioApiUrlForSound: (gradioApiUrlForSound?: string) => void
272
- setGradioApiUrlForMusic: (gradioApiUrlForMusic?: string) => void
273
-
274
  setReplicateModelForImage: (replicateModelForImage?: string) => void
275
  setReplicateModelForImageDepth: (replicateModelForImageDepth?: string) => void
276
  setReplicateModelForImageSegmentation: (replicateModelForImageSegmentation?: string) => void
@@ -333,7 +339,7 @@ export type SettingsControls = {
333
  setMistralAiModelForAssistant: (mistralAiModelForAssistant?: string) => void
334
 
335
  setKitsAiModelForVoice: (kitsAiModelForVoice?: string) => void
336
-
337
  getSettings: () => SettingsState
338
  }
339
 
 
88
 
89
  // ---------- MODELS FOR EACH PROVIDER --------------
90
 
91
+
92
+ gradioApiUrlForAssistant: string
93
+ gradioApiUrlForImage: string
94
+ gradioApiUrlForVideo: string
95
+ gradioApiUrlForVoice: string
96
+ gradioApiUrlForSound: string
97
+ gradioApiUrlForMusic: string
98
+
99
+ /*
100
+ should we deprecate this? or rename to "default<something>"?
101
  huggingFaceModelForAssistant: string
102
  huggingFaceModelForImage: string
103
  huggingFaceModelForImageDepth: string
 
111
  huggingFaceModelForSound: string
112
  huggingFaceModelForMusic: string
113
 
 
 
 
 
 
 
 
114
  replicateModelForImage: string
115
  replicateModelForImageDepth: string
116
  replicateModelForImageSegmentation: string
 
174
  cohereModelForAssistant: string
175
 
176
  mistralAiModelForAssistant: string
177
+ */
178
  }
179
 
180
  export type SettingsControls = {
 
255
  setComfyWorkflowForSound: (comfyWorkflowForSound?: string) => void
256
  setComfyWorkflowForMusic: (comfyWorkflowForMusic?: string) => void
257
 
258
+ setGradioApiUrlForAssistant: (gradioApiUrlForAssistant?: string) => void
259
+ setGradioApiUrlForImage: (gradioApiUrlForImage?: string) => void
260
+ setGradioApiUrlForVideo: (gradioApiUrlForVideo?: string) => void
261
+ setGradioApiUrlForVoice: (gradioApiUrlForVoice?: string) => void
262
+ setGradioApiUrlForSound: (gradioApiUrlForSound?: string) => void
263
+ setGradioApiUrlForMusic: (gradioApiUrlForMusic?: string) => void
264
+
265
+ /*
266
+ should we deprecate this? or rename to "default<something>"?
267
  setHuggingFaceModelForAssistant: (huggingFaceModelForAssistant?: string) => void
268
  setHuggingFaceModelForImage: (huggingFaceModelForImage?: string) => void
269
  setHuggingFaceModelForImageDepth: (huggingFaceModelForImageDepth?: string) => void
 
277
  setHuggingFaceModelForSound: (huggingFaceModelForSound?: string) => void
278
  setHuggingFaceModelForMusic: (huggingFaceModelForMusic?: string) => void
279
 
 
 
 
 
 
 
 
280
  setReplicateModelForImage: (replicateModelForImage?: string) => void
281
  setReplicateModelForImageDepth: (replicateModelForImageDepth?: string) => void
282
  setReplicateModelForImageSegmentation: (replicateModelForImageSegmentation?: string) => void
 
339
  setMistralAiModelForAssistant: (mistralAiModelForAssistant?: string) => void
340
 
341
  setKitsAiModelForVoice: (kitsAiModelForVoice?: string) => void
342
+ */
343
  getSettings: () => SettingsState
344
  }
345
 
src/controllers/settings/useSettings.ts CHANGED
@@ -281,7 +281,26 @@ export const useSettings = create<SettingsStore>()(
281
  setComfyWorkflowForMusic: (comfyWorkflowForMusic?: string) => {
282
  set({ comfyWorkflowForMusic: getValidComfyWorkflowTemplate(comfyWorkflowForMusic, getDefaultSettingsState().comfyWorkflowForMusic) })
283
  },
284
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  setHuggingFaceModelForAssistant: (huggingFaceModelForAssistant?: string) => {
286
  set({ huggingFaceModelForAssistant: getValidString(huggingFaceModelForAssistant, getDefaultSettingsState().huggingFaceModelForAssistant) })
287
  },
@@ -318,24 +337,6 @@ export const useSettings = create<SettingsStore>()(
318
  setHuggingFaceModelForMusic: (huggingFaceModelForMusic?: string) => {
319
  set({ huggingFaceModelForMusic: getValidString(huggingFaceModelForMusic, getDefaultSettingsState().huggingFaceModelForMusic) })
320
  },
321
- setGradioApiUrlForAssistant: (gradioApiUrlForAssistant?: string) => {
322
- set({ gradioApiUrlForAssistant: getValidString(gradioApiUrlForAssistant, getDefaultSettingsState().gradioApiUrlForAssistant) })
323
- },
324
- setGradioApiUrlForImage: (gradioApiUrlForImage?: string) => {
325
- set({ gradioApiUrlForImage: getValidString(gradioApiUrlForImage, getDefaultSettingsState().gradioApiUrlForImage) })
326
- },
327
- setGradioApiUrlForVideo: (gradioApiUrlForVideo?: string) => {
328
- set({ gradioApiUrlForVideo: getValidString(gradioApiUrlForVideo, getDefaultSettingsState().gradioApiUrlForVideo) })
329
- },
330
- setGradioApiUrlForVoice: (gradioApiUrlForVoice?: string) => {
331
- set({ gradioApiUrlForVoice: getValidString(gradioApiUrlForVoice, getDefaultSettingsState().gradioApiUrlForVoice) })
332
- },
333
- setGradioApiUrlForSound: (gradioApiUrlForSound?: string) => {
334
- set({ gradioApiUrlForSound: getValidString(gradioApiUrlForSound, getDefaultSettingsState().gradioApiUrlForSound) })
335
- },
336
- setGradioApiUrlForMusic: (gradioApiUrlForMusic?: string) => {
337
- set({ gradioApiUrlForMusic: getValidString(gradioApiUrlForMusic, getDefaultSettingsState().gradioApiUrlForMusic) })
338
- },
339
  setReplicateModelForImage: (replicateModelForImage?: string) => {
340
  set({ replicateModelForImage: getValidString(replicateModelForImage, getDefaultSettingsState().replicateModelForImage) })
341
  },
@@ -489,6 +490,7 @@ export const useSettings = create<SettingsStore>()(
489
  setKitsAiModelForVoice: (kitsAiModelForVoice?: string) => {
490
  set({ kitsAiModelForVoice: getValidString(kitsAiModelForVoice, getDefaultSettingsState().kitsAiModelForVoice) })
491
  },
 
492
  getSettings: (): SettingsState => {
493
  const state = get()
494
  const defaultSettings = getDefaultSettingsState()
@@ -570,6 +572,15 @@ export const useSettings = create<SettingsStore>()(
570
  comfyWorkflowForVoice: state.comfyWorkflowForVoice || defaultSettings.comfyWorkflowForVoice,
571
  comfyWorkflowForSound: state.comfyWorkflowForSound || defaultSettings.comfyWorkflowForSound,
572
  comfyWorkflowForMusic: state.comfyWorkflowForMusic || defaultSettings.comfyWorkflowForMusic,
 
 
 
 
 
 
 
 
 
573
  huggingFaceModelForAssistant: state.huggingFaceModelForAssistant || defaultSettings.huggingFaceModelForAssistant,
574
  huggingFaceModelForImage: state.huggingFaceModelForImage || defaultSettings.huggingFaceModelForImage,
575
  huggingFaceModelForImageDepth: state.huggingFaceModelForImageDepth || defaultSettings.huggingFaceModelForImageDepth,
@@ -582,12 +593,6 @@ export const useSettings = create<SettingsStore>()(
582
  huggingFaceModelForVoice: state.huggingFaceModelForVoice || defaultSettings.huggingFaceModelForVoice,
583
  huggingFaceModelForSound: state.huggingFaceModelForSound || defaultSettings.huggingFaceModelForSound,
584
  huggingFaceModelForMusic: state.huggingFaceModelForMusic || defaultSettings.huggingFaceModelForMusic,
585
- gradioApiUrlForAssistant: state.gradioApiUrlForAssistant || defaultSettings.gradioApiUrlForAssistant,
586
- gradioApiUrlForImage: state.gradioApiUrlForImage || defaultSettings.gradioApiUrlForImage,
587
- gradioApiUrlForVideo: state.gradioApiUrlForVideo || defaultSettings.gradioApiUrlForVideo,
588
- gradioApiUrlForVoice: state.gradioApiUrlForVoice || defaultSettings.gradioApiUrlForVoice,
589
- gradioApiUrlForSound: state.gradioApiUrlForSound || defaultSettings.gradioApiUrlForSound,
590
- gradioApiUrlForMusic: state.gradioApiUrlForMusic || defaultSettings.gradioApiUrlForMusic,
591
  replicateModelForImage: state.replicateModelForImage || defaultSettings.replicateModelForImage,
592
  replicateModelForImageDepth: state.replicateModelForImageDepth || defaultSettings.replicateModelForImageDepth,
593
  replicateModelForImageSegmentation: state.replicateModelForImageSegmentation || defaultSettings.replicateModelForImageSegmentation,
@@ -639,6 +644,7 @@ export const useSettings = create<SettingsStore>()(
639
  cohereModelForAssistant: state.cohereModelForAssistant || defaultSettings.cohereModelForAssistant,
640
  mistralAiModelForAssistant: state.mistralAiModelForAssistant || defaultSettings.mistralAiModelForAssistant,
641
  kitsAiModelForVoice: state.kitsAiModelForVoice || defaultSettings.kitsAiModelForVoice,
 
642
  }
643
  },
644
  }),
@@ -646,4 +652,8 @@ export const useSettings = create<SettingsStore>()(
646
  name: 'CLAPPER_REVISION_0_CONTROLLERS_USE_SETTINGS'
647
  },
648
  ),
649
- )
 
 
 
 
 
281
  setComfyWorkflowForMusic: (comfyWorkflowForMusic?: string) => {
282
  set({ comfyWorkflowForMusic: getValidComfyWorkflowTemplate(comfyWorkflowForMusic, getDefaultSettingsState().comfyWorkflowForMusic) })
283
  },
284
+ setGradioApiUrlForAssistant: (gradioApiUrlForAssistant?: string) => {
285
+ set({ gradioApiUrlForAssistant: getValidString(gradioApiUrlForAssistant, getDefaultSettingsState().gradioApiUrlForAssistant) })
286
+ },
287
+ setGradioApiUrlForImage: (gradioApiUrlForImage?: string) => {
288
+ set({ gradioApiUrlForImage: getValidString(gradioApiUrlForImage, getDefaultSettingsState().gradioApiUrlForImage) })
289
+ },
290
+ setGradioApiUrlForVideo: (gradioApiUrlForVideo?: string) => {
291
+ set({ gradioApiUrlForVideo: getValidString(gradioApiUrlForVideo, getDefaultSettingsState().gradioApiUrlForVideo) })
292
+ },
293
+ setGradioApiUrlForVoice: (gradioApiUrlForVoice?: string) => {
294
+ set({ gradioApiUrlForVoice: getValidString(gradioApiUrlForVoice, getDefaultSettingsState().gradioApiUrlForVoice) })
295
+ },
296
+ setGradioApiUrlForSound: (gradioApiUrlForSound?: string) => {
297
+ set({ gradioApiUrlForSound: getValidString(gradioApiUrlForSound, getDefaultSettingsState().gradioApiUrlForSound) })
298
+ },
299
+ setGradioApiUrlForMusic: (gradioApiUrlForMusic?: string) => {
300
+ set({ gradioApiUrlForMusic: getValidString(gradioApiUrlForMusic, getDefaultSettingsState().gradioApiUrlForMusic) })
301
+ },
302
+ /*
303
+ should we deprecate this? or rename to "default<something>"?
304
  setHuggingFaceModelForAssistant: (huggingFaceModelForAssistant?: string) => {
305
  set({ huggingFaceModelForAssistant: getValidString(huggingFaceModelForAssistant, getDefaultSettingsState().huggingFaceModelForAssistant) })
306
  },
 
337
  setHuggingFaceModelForMusic: (huggingFaceModelForMusic?: string) => {
338
  set({ huggingFaceModelForMusic: getValidString(huggingFaceModelForMusic, getDefaultSettingsState().huggingFaceModelForMusic) })
339
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  setReplicateModelForImage: (replicateModelForImage?: string) => {
341
  set({ replicateModelForImage: getValidString(replicateModelForImage, getDefaultSettingsState().replicateModelForImage) })
342
  },
 
490
  setKitsAiModelForVoice: (kitsAiModelForVoice?: string) => {
491
  set({ kitsAiModelForVoice: getValidString(kitsAiModelForVoice, getDefaultSettingsState().kitsAiModelForVoice) })
492
  },
493
+ */
494
  getSettings: (): SettingsState => {
495
  const state = get()
496
  const defaultSettings = getDefaultSettingsState()
 
572
  comfyWorkflowForVoice: state.comfyWorkflowForVoice || defaultSettings.comfyWorkflowForVoice,
573
  comfyWorkflowForSound: state.comfyWorkflowForSound || defaultSettings.comfyWorkflowForSound,
574
  comfyWorkflowForMusic: state.comfyWorkflowForMusic || defaultSettings.comfyWorkflowForMusic,
575
+ gradioApiUrlForAssistant: state.gradioApiUrlForAssistant || defaultSettings.gradioApiUrlForAssistant,
576
+ gradioApiUrlForImage: state.gradioApiUrlForImage || defaultSettings.gradioApiUrlForImage,
577
+ gradioApiUrlForVideo: state.gradioApiUrlForVideo || defaultSettings.gradioApiUrlForVideo,
578
+ gradioApiUrlForVoice: state.gradioApiUrlForVoice || defaultSettings.gradioApiUrlForVoice,
579
+ gradioApiUrlForSound: state.gradioApiUrlForSound || defaultSettings.gradioApiUrlForSound,
580
+ gradioApiUrlForMusic: state.gradioApiUrlForMusic || defaultSettings.gradioApiUrlForMusic,
581
+
582
+ /*
583
+ should we deprecate this? or rename to "default<something>"?
584
  huggingFaceModelForAssistant: state.huggingFaceModelForAssistant || defaultSettings.huggingFaceModelForAssistant,
585
  huggingFaceModelForImage: state.huggingFaceModelForImage || defaultSettings.huggingFaceModelForImage,
586
  huggingFaceModelForImageDepth: state.huggingFaceModelForImageDepth || defaultSettings.huggingFaceModelForImageDepth,
 
593
  huggingFaceModelForVoice: state.huggingFaceModelForVoice || defaultSettings.huggingFaceModelForVoice,
594
  huggingFaceModelForSound: state.huggingFaceModelForSound || defaultSettings.huggingFaceModelForSound,
595
  huggingFaceModelForMusic: state.huggingFaceModelForMusic || defaultSettings.huggingFaceModelForMusic,
 
 
 
 
 
 
596
  replicateModelForImage: state.replicateModelForImage || defaultSettings.replicateModelForImage,
597
  replicateModelForImageDepth: state.replicateModelForImageDepth || defaultSettings.replicateModelForImageDepth,
598
  replicateModelForImageSegmentation: state.replicateModelForImageSegmentation || defaultSettings.replicateModelForImageSegmentation,
 
644
  cohereModelForAssistant: state.cohereModelForAssistant || defaultSettings.cohereModelForAssistant,
645
  mistralAiModelForAssistant: state.mistralAiModelForAssistant || defaultSettings.mistralAiModelForAssistant,
646
  kitsAiModelForVoice: state.kitsAiModelForVoice || defaultSettings.kitsAiModelForVoice,
647
+ */
648
  }
649
  },
650
  }),
 
652
  name: 'CLAPPER_REVISION_0_CONTROLLERS_USE_SETTINGS'
653
  },
654
  ),
655
+ )
656
+
657
+ if (typeof window !== "undefined") {
658
+ (window as any).useSettings = useSettings
659
+ }
src/lib/core/constants.ts CHANGED
@@ -4,7 +4,7 @@
4
  export const HARD_LIMIT_NB_MAX_ASSETS_TO_GENERATE_IN_PARALLEL = 32
5
 
6
  export const APP_NAME = "Clapper.app"
7
- export const APP_REVISION = "r20240617-0204"
8
 
9
  export const APP_DOMAIN = "Clapper.app"
10
  export const APP_LINK = "https://clapper.app"
 
4
  export const HARD_LIMIT_NB_MAX_ASSETS_TO_GENERATE_IN_PARALLEL = 32
5
 
6
  export const APP_NAME = "Clapper.app"
7
+ export const APP_REVISION = "r20240623-1700"
8
 
9
  export const APP_DOMAIN = "Clapper.app"
10
  export const APP_LINK = "https://clapper.app"
src/lib/hf/adapter/adaptAnyInputsToGradioInputs.ts CHANGED
@@ -2,6 +2,7 @@ import { GradioApiInfo, SupportedFields } from "../types"
2
  import { identifyField } from "./identifyField"
3
  import { getDefaultFields } from "./getDefaultFields"
4
  import { findMainGradioEndpoint } from "./findMainGradioEndpoint"
 
5
 
6
  /**
7
  * This function try to adapt arbitrary inputs to strict gradio inputs
@@ -17,7 +18,8 @@ export function adaptAnyInputsToGradioInputs({
17
  gradioApiInfo: GradioApiInfo
18
  }): {
19
  endpoint: string
20
- inputs: Array<string | number | boolean | undefined | null>
 
21
  } {
22
 
23
  const mainGradioEndpoint = findMainGradioEndpoint({ gradioApiInfo })
@@ -34,9 +36,12 @@ export function adaptAnyInputsToGradioInputs({
34
  inputFields[key] = inputField
35
  allInputFields = {...allInputFields, ...inputField}
36
  }
 
37
 
38
  // the gradio input array
39
- const gradioInputs: any[] = []
 
 
40
 
41
  for (const parameter of mainGradioEndpoint.endpoint.parameters) {
42
  let gradioInputValue: any = undefined
@@ -55,11 +60,32 @@ export function adaptAnyInputsToGradioInputs({
55
  if (fields.hasInputGuidance) { gradioInputValue = allInputFields.inputGuidance }
56
  if (fields.hasInputSeed) { gradioInputValue = allInputFields.inputSeed }
57
 
58
- gradioInputs.push(gradioInputValue)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
 
 
 
 
 
61
  return {
62
  endpoint: mainGradioEndpoint.name,
63
- inputs: gradioInputs
 
64
  }
65
  }
 
2
  import { identifyField } from "./identifyField"
3
  import { getDefaultFields } from "./getDefaultFields"
4
  import { findMainGradioEndpoint } from "./findMainGradioEndpoint"
5
+ import { base64DataUriToBlob } from "@/lib/utils/base64DataUriToBlob"
6
 
7
  /**
8
  * This function try to adapt arbitrary inputs to strict gradio inputs
 
18
  gradioApiInfo: GradioApiInfo
19
  }): {
20
  endpoint: string
21
+ inputArray: Array<string | number | boolean | Blob | undefined | null>
22
+ inputMap: Record<string, string | number | boolean | Blob | undefined | null>
23
  } {
24
 
25
  const mainGradioEndpoint = findMainGradioEndpoint({ gradioApiInfo })
 
36
  inputFields[key] = inputField
37
  allInputFields = {...allInputFields, ...inputField}
38
  }
39
+ console.log(`input fields passed by the parent calling function:`, inputFields)
40
 
41
  // the gradio input array
42
+ // apparently the new JS client also supports dictionaries, yay!
43
+ let inputArray: Array<string | number | boolean | Blob | undefined | null> = []
44
+ let inputMap: Record<string, string | number | boolean | Blob | undefined | null> = {}
45
 
46
  for (const parameter of mainGradioEndpoint.endpoint.parameters) {
47
  let gradioInputValue: any = undefined
 
60
  if (fields.hasInputGuidance) { gradioInputValue = allInputFields.inputGuidance }
61
  if (fields.hasInputSeed) { gradioInputValue = allInputFields.inputSeed }
62
 
63
+ //console.log("parameter:", parameter)
64
+ const valueSeemsToBeABase64Uri = typeof gradioInputValue === "string" && gradioInputValue.startsWith("data:")
65
+ const fieldSeemsToBeTextBased =
66
+ parameter.type === "string"
67
+ || parameter.component === "Textbox"
68
+ // || parameter.parameter_name.includes("base64")
69
+
70
+ // the magic doesn't end here: we need to convert base64 inputs to buffers,
71
+ // unless gradio expects it to be a text
72
+ if (valueSeemsToBeABase64Uri && !fieldSeemsToBeTextBased) {
73
+ gradioInputValue = base64DataUriToBlob(gradioInputValue)
74
+ }
75
+ // old, low-level way
76
+ inputArray.push(gradioInputValue)
77
+
78
+ // new, high-level way
79
+ inputMap[parameter.parameter_name] = gradioInputValue
80
  }
81
 
82
+ console.log(`inputArray:`, inputArray.map(x => typeof x === "string" ? x.slice(0, 255) : x))
83
+
84
+ console.log(`await client.predict("${ mainGradioEndpoint.name}", `, inputMap);
85
+
86
  return {
87
  endpoint: mainGradioEndpoint.name,
88
+ inputArray,
89
+ inputMap,
90
  }
91
  }
src/lib/hf/adapter/identifyField.ts CHANGED
@@ -1,9 +1,17 @@
1
  import { SupportedFields } from "../types"
2
 
3
  export function identifyField(key: string, value?: any, index?: number): Partial<SupportedFields> {
4
- const normalizedKey = key.toLowerCase().trim()
 
 
 
 
 
 
 
 
 
5
  switch (normalizedKey) {
6
-
7
  case "width":
8
  let strWidth = ""
9
  let numWidth = 0
 
1
  import { SupportedFields } from "../types"
2
 
3
  export function identifyField(key: string, value?: any, index?: number): Partial<SupportedFields> {
4
+ const normalizedKey =
5
+ key
6
+ .toLowerCase()
7
+ .replaceAll("_uri", "")
8
+ .replaceAll("_url", "")
9
+ .replaceAll("_b64", "")
10
+ .replaceAll("_base64", "")
11
+ .trim()
12
+ console.log(`normalizedKey: ${normalizedKey}`)
13
+
14
  switch (normalizedKey) {
 
15
  case "width":
16
  let strWidth = ""
17
  let numWidth = 0
src/lib/hf/callGradioApi.ts CHANGED
@@ -48,6 +48,7 @@ export async function callGradioApi<T>({
48
  }
49
  */
50
 
 
51
  const gradioApiInfo = await getGradioApiInfo({
52
  url: ownerAndId,
53
  apiKey
@@ -61,17 +62,20 @@ export async function callGradioApi<T>({
61
  })
62
 
63
  // console.log(`gradioEndpointInputs: `, gradioEndpointInputs)
64
-
65
  const app = await Client.connect(ownerAndId, {
66
  hf_token: apiKey as any
67
  })
68
  // console.log(`app: `, app)
69
 
 
70
  const output = await app.predict(
71
  gradioEndpointInputs.endpoint,
72
- gradioEndpointInputs.inputs
73
  )
74
- console.log(`output: `, output)
75
-
76
- return output.data as unknown as T
77
- }
 
 
 
48
  }
49
  */
50
 
51
+
52
  const gradioApiInfo = await getGradioApiInfo({
53
  url: ownerAndId,
54
  apiKey
 
62
  })
63
 
64
  // console.log(`gradioEndpointInputs: `, gradioEndpointInputs)
65
+
66
  const app = await Client.connect(ownerAndId, {
67
  hf_token: apiKey as any
68
  })
69
  // console.log(`app: `, app)
70
 
71
+ console.log(`calling Gradio API ${ownerAndId}:${gradioEndpointInputs.endpoint}`)
72
  const output = await app.predict(
73
  gradioEndpointInputs.endpoint,
74
+ gradioEndpointInputs.inputMap
75
  )
76
+ // console.log(`output: `, output)
77
+
78
+ const data1 = (Array.isArray( output.data) ? output.data[0] : "") || ""
79
+
80
+ return data1 as unknown as T
81
+ }
src/lib/hf/getSpaces.ts CHANGED
@@ -44,6 +44,7 @@ export async function getSpaces({
44
  })) {
45
 
46
  if (sdk && space.sdk != sdk) { continue }
 
47
  results.push(space)
48
  }
49
 
 
44
  })) {
45
 
46
  if (sdk && space.sdk != sdk) { continue }
47
+
48
  results.push(space)
49
  }
50
 
src/lib/utils/base64DataUriToBlob.ts ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export function base64DataUriToBlob(dataURI: string) {
2
+ dataURI = dataURI.replace(/^data:/, '');
3
+
4
+ const match = dataURI.match(/(?:image|video|audio|text)\/[^;]+/)
5
+ const type = match?.[0] || ""
6
+ const base64 = dataURI.replace(/^[^,]+,/, '');
7
+ const arrayBuffer = new ArrayBuffer(base64.length);
8
+ const typedArray = new Uint8Array(arrayBuffer);
9
+
10
+ for (let i = 0; i < base64.length; i++) {
11
+ typedArray[i] = base64.charCodeAt(i);
12
+ }
13
+
14
+ return new Blob([arrayBuffer], { type });
15
+ }
tailwind.config.js CHANGED
@@ -17,6 +17,10 @@ module.exports = {
17
  },
18
  },
19
  extend: {
 
 
 
 
20
  fontFamily: {
21
  salsa: ['var(--font-salsa)'],
22
  clock: ["var(--font-clock)"],
 
17
  },
18
  },
19
  extend: {
20
+ aspectRatio: {
21
+ '1024/576': '1024 / 576',
22
+ },
23
+
24
  fontFamily: {
25
  salsa: ['var(--font-salsa)'],
26
  clock: ["var(--font-clock)"],
tsconfig.json CHANGED
@@ -1,11 +1,9 @@
1
  {
2
  "compilerOptions": {
3
- "target": "ES2022",
4
  "lib": ["dom", "dom.iterable", "esnext"],
5
  "allowJs": true,
6
  "skipLibCheck": true,
7
  "strict": true,
8
- "forceConsistentCasingInFileNames": true,
9
  "noEmit": true,
10
  "esModuleInterop": true,
11
  "module": "esnext",
 
1
  {
2
  "compilerOptions": {
 
3
  "lib": ["dom", "dom.iterable", "esnext"],
4
  "allowJs": true,
5
  "skipLibCheck": true,
6
  "strict": true,
 
7
  "noEmit": true,
8
  "esModuleInterop": true,
9
  "module": "esnext",