jbilcke-hf HF staff commited on
Commit
52d39bb
1 Parent(s): 2dd34f0

use latest version of clap

Browse files
.nvmrc CHANGED
@@ -1 +1 @@
1
- v20.15.1
 
1
+ v20.17.0
package-lock.json CHANGED
The diff for this file is too large to render. See raw diff
 
package.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "@aitube/website",
3
- "version": "0.2.0",
4
  "private": true,
5
  "scripts": {
6
  "patch": "cp -f patch.js node_modules/fluent-ffmpeg/index.js",
@@ -10,9 +10,9 @@
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
- "@aitube/clap": "0.2.3",
14
- "@aitube/client": "0.2.3",
15
- "@aitube/engine": "0.2.3",
16
  "@huggingface/hub": "0.15.1",
17
  "@huggingface/inference": "^2.7.0",
18
  "@jcoreio/async-throttle": "^1.6.0",
 
1
  {
2
  "name": "@aitube/website",
3
+ "version": "0.2.4",
4
  "private": true,
5
  "scripts": {
6
  "patch": "cp -f patch.js node_modules/fluent-ffmpeg/index.js",
 
10
  "lint": "next lint"
11
  },
12
  "dependencies": {
13
+ "@aitube/clap": "0.2.4",
14
+ "@aitube/client": "0.2.4-3",
15
+ "@aitube/engine": "0.2.4",
16
  "@huggingface/hub": "0.15.1",
17
  "@huggingface/inference": "^2.7.0",
18
  "@jcoreio/async-throttle": "^1.6.0",
src/app/api/actions/ai-tube-hf/downloadClapProject.ts CHANGED
@@ -44,7 +44,8 @@ export async function downloadClapProject({
44
  label: clapProject.meta.title || "Untitled",
45
  description: clapProject.meta.description || "",
46
  prompt: "", // there is no prompt - instead we use segments
47
- model: parseVideoModelName(clapProject.meta.defaultVideoModel, channel.model),
 
48
  style: channel.style,
49
  lora: channel.lora,
50
  voice: channel.voice,
@@ -57,7 +58,7 @@ export async function downloadClapProject({
57
  duration: 0, // will be computed automatically
58
  ...computeOrientationProjectionWidthHeight({
59
  lora: "",
60
- orientation: clapProject.meta.orientation,
61
  // projection, // <- will be extrapolated from the LoRA for now
62
  }),
63
  }
 
44
  label: clapProject.meta.title || "Untitled",
45
  description: clapProject.meta.description || "",
46
  prompt: "", // there is no prompt - instead we use segments
47
+ // model: parseVideoModelName(clapProject.meta.defaultVideoModel, channel.model),
48
+ model: parseVideoModelName(channel.model, channel.model),
49
  style: channel.style,
50
  lora: channel.lora,
51
  voice: channel.voice,
 
58
  duration: 0, // will be computed automatically
59
  ...computeOrientationProjectionWidthHeight({
60
  lora: "",
61
+ orientation: clapProject.meta.imageRatio,
62
  // projection, // <- will be extrapolated from the LoRA for now
63
  }),
64
  }
src/app/api/actions/ai-tube-hf/parseChannel.ts CHANGED
@@ -1,6 +1,6 @@
1
  "use server"
2
 
3
- import { ClapMediaOrientation, defaultMediaOrientation } from "@aitube/clap"
4
  import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
5
  import { parseDatasetReadme } from "@/app/api/parsers/parseDatasetReadme"
6
  import { ChannelInfo, VideoGenerationModel } from "@/types/general"
@@ -79,7 +79,7 @@ export async function parseChannel(options: {
79
  let voice = ""
80
  let music = ""
81
  let tags: string[] = []
82
- let orientation: ClapMediaOrientation = defaultMediaOrientation
83
 
84
  // console.log(`going to read datasets/${name}`)
85
  try {
@@ -103,7 +103,7 @@ export async function parseChannel(options: {
103
  style = parsedDatasetReadme.style || ""
104
  voice = parsedDatasetReadme.voice || ""
105
  music = parsedDatasetReadme.music || ""
106
- orientation = parsedDatasetReadme.orientation || defaultMediaOrientation
107
 
108
  thumbnail =
109
  thumbnail.startsWith("http")
 
1
  "use server"
2
 
3
+ import { ClapImageRatio, defaultImageRatio } from "@aitube/clap"
4
  import { Credentials, downloadFile, whoAmI } from "@/lib/huggingface/hub/src"
5
  import { parseDatasetReadme } from "@/app/api/parsers/parseDatasetReadme"
6
  import { ChannelInfo, VideoGenerationModel } from "@/types/general"
 
79
  let voice = ""
80
  let music = ""
81
  let tags: string[] = []
82
+ let orientation: ClapImageRatio = defaultImageRatio
83
 
84
  // console.log(`going to read datasets/${name}`)
85
  try {
 
103
  style = parsedDatasetReadme.style || ""
104
  voice = parsedDatasetReadme.voice || ""
105
  music = parsedDatasetReadme.music || ""
106
+ orientation = parsedDatasetReadme.orientation || defaultImageRatio
107
 
108
  thumbnail =
109
  thumbnail.startsWith("http")
src/app/api/actions/ai-tube-hf/uploadVideoRequestToDataset.ts CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import { Blob } from "buffer"
4
 
5
- import { ClapMediaOrientation } from "@aitube/clap"
6
 
7
  import { Credentials, uploadFile, whoAmI } from "@/lib/huggingface/hub/src"
8
  import { ChannelInfo, VideoGenerationModel, MediaInfo, VideoRequest } from "@/types/general"
@@ -41,7 +41,7 @@ export async function uploadVideoRequestToDataset({
41
  music: string
42
  tags: string[]
43
  duration: number
44
- orientation: ClapMediaOrientation
45
  }): Promise<{
46
  videoRequest: VideoRequest
47
  videoInfo: MediaInfo
 
2
 
3
  import { Blob } from "buffer"
4
 
5
+ import { ClapImageRatio } from "@aitube/clap"
6
 
7
  import { Credentials, uploadFile, whoAmI } from "@/lib/huggingface/hub/src"
8
  import { ChannelInfo, VideoGenerationModel, MediaInfo, VideoRequest } from "@/types/general"
 
41
  music: string
42
  tags: string[]
43
  duration: number
44
+ orientation: ClapImageRatio
45
  }): Promise<{
46
  videoRequest: VideoRequest
47
  videoInfo: MediaInfo
src/app/api/actions/submitVideoRequest.ts CHANGED
@@ -1,6 +1,6 @@
1
  "use server"
2
 
3
- import { ClapMediaOrientation } from "@aitube/clap"
4
 
5
  import { ChannelInfo, VideoGenerationModel, MediaInfo } from "@/types/general"
6
 
@@ -33,7 +33,7 @@ export async function submitVideoRequest({
33
  music: string
34
  tags: string[]
35
  duration: number
36
- orientation: ClapMediaOrientation
37
  }): Promise<MediaInfo> {
38
  if (!apiKey) {
39
  throw new Error(`the apiKey is required`)
 
1
  "use server"
2
 
3
+ import { ClapImageRatio } from "@aitube/clap"
4
 
5
  import { ChannelInfo, VideoGenerationModel, MediaInfo } from "@/types/general"
6
 
 
33
  music: string
34
  tags: string[]
35
  duration: number
36
+ orientation: ClapImageRatio
37
  }): Promise<MediaInfo> {
38
  if (!apiKey) {
39
  throw new Error(`the apiKey is required`)
src/app/api/generators/clap/addLatentScenesToClap.ts CHANGED
@@ -1,6 +1,6 @@
1
  "use server"
2
 
3
- import { ClapProject, newSegment } from "@aitube/clap"
4
 
5
  import { LatentScenes } from "./types"
6
  import { defaultSegmentDurationInMs } from "./constants"
@@ -33,10 +33,10 @@ export async function addLatentScenesToClap({
33
  track: 0,
34
  startTimeInMs,
35
  endTimeInMs,
36
- category: "interface",
37
  prompt: "<BUILTIN:DISCLAIMER>",
38
  label: "fish",
39
- outputType: "interface",
40
  }))
41
 
42
  for (const { characters, locations, actions } of scenes) {
@@ -50,10 +50,10 @@ export async function addLatentScenesToClap({
50
  track: track++,
51
  startTimeInMs,
52
  endTimeInMs,
53
- category: "characters",
54
  prompt: character,
55
  label: character,
56
- outputType: "text",
57
  }))
58
  }
59
 
@@ -62,10 +62,10 @@ export async function addLatentScenesToClap({
62
  track: track++,
63
  startTimeInMs,
64
  endTimeInMs,
65
- category: "location",
66
  prompt: location,
67
  label: location,
68
- outputType: "text",
69
  }))
70
  }
71
 
@@ -74,10 +74,10 @@ export async function addLatentScenesToClap({
74
  track: track++,
75
  startTimeInMs,
76
  endTimeInMs,
77
- category: "action",
78
  prompt: action,
79
  label: action,
80
- outputType: "text",
81
  }))
82
  }
83
 
@@ -85,10 +85,10 @@ export async function addLatentScenesToClap({
85
  track: track++,
86
  startTimeInMs,
87
  endTimeInMs,
88
- category: "video",
89
  prompt: "video",
90
  label: "video",
91
- outputType: "video",
92
  }))
93
  }
94
 
 
1
  "use server"
2
 
3
+ import { ClapOutputType, ClapProject, ClapSegmentCategory, newSegment } from "@aitube/clap"
4
 
5
  import { LatentScenes } from "./types"
6
  import { defaultSegmentDurationInMs } from "./constants"
 
33
  track: 0,
34
  startTimeInMs,
35
  endTimeInMs,
36
+ category: ClapSegmentCategory.INTERFACE,
37
  prompt: "<BUILTIN:DISCLAIMER>",
38
  label: "fish",
39
+ outputType: ClapOutputType.INTERFACE
40
  }))
41
 
42
  for (const { characters, locations, actions } of scenes) {
 
50
  track: track++,
51
  startTimeInMs,
52
  endTimeInMs,
53
+ category: ClapSegmentCategory.CHARACTER,
54
  prompt: character,
55
  label: character,
56
+ outputType: ClapOutputType.TEXT,
57
  }))
58
  }
59
 
 
62
  track: track++,
63
  startTimeInMs,
64
  endTimeInMs,
65
+ category: ClapSegmentCategory.LOCATION,
66
  prompt: location,
67
  label: location,
68
+ outputType: ClapOutputType.TEXT,
69
  }))
70
  }
71
 
 
74
  track: track++,
75
  startTimeInMs,
76
  endTimeInMs,
77
+ category: ClapSegmentCategory.ACTION,
78
  prompt: action,
79
  label: action,
80
+ outputType: ClapOutputType.TEXT,
81
  }))
82
  }
83
 
 
85
  track: track++,
86
  startTimeInMs,
87
  endTimeInMs,
88
+ category: ClapSegmentCategory.VIDEO,
89
  prompt: "video",
90
  label: "video",
91
+ outputType: ClapOutputType.VIDEO,
92
  }))
93
  }
94
 
src/app/api/generators/clap/generateClap.ts CHANGED
@@ -4,7 +4,7 @@
4
  import { LatentScenes } from "./types"
5
  import { addLatentScenesToClap } from "./addLatentScenesToClap"
6
  import { getLatentScenes } from "./getLatentScenes"
7
- import { ClapProject, getEmptyClap, newClap, serializeClap } from "@aitube/clap"
8
 
9
  /**
10
  * Generate a Clap file from scratch using a prompt
@@ -31,14 +31,16 @@ export async function generateClap({
31
  title: "Latent content", // TODO "
32
  description: "",
33
  licence: "non commercial",
34
- orientation: "landscape",
35
  width: 1024,
36
  height: 576,
37
- defaultVideoModel: "SDXL",
38
- extraPositivePrompt: [],
39
- screenplay: "",
40
  isLoop: true,
41
  isInteractive: true,
 
 
42
  }
43
  })
44
 
 
4
  import { LatentScenes } from "./types"
5
  import { addLatentScenesToClap } from "./addLatentScenesToClap"
6
  import { getLatentScenes } from "./getLatentScenes"
7
+ import { ClapImageRatio, ClapProject, getEmptyClap, newClap, serializeClap } from "@aitube/clap"
8
 
9
  /**
10
  * Generate a Clap file from scratch using a prompt
 
31
  title: "Latent content", // TODO "
32
  description: "",
33
  licence: "non commercial",
34
+ imageRatio: ClapImageRatio.LANDSCAPE,
35
  width: 1024,
36
  height: 576,
37
+ imagePrompt: "",
38
+ storyPrompt: "",
39
+ systemPrompt: "",
40
  isLoop: true,
41
  isInteractive: true,
42
+ bpm: 120,
43
+ frameRate: 24,
44
  }
45
  })
46
 
src/app/api/generators/search/defaultChannel.ts CHANGED
@@ -1,5 +1,5 @@
1
  import { ChannelInfo } from "@/types/general"
2
- import { defaultMediaOrientation } from "@aitube/clap"
3
 
4
  export const defaultChannel: ChannelInfo = {
5
  /**
@@ -67,5 +67,5 @@ export const defaultChannel: ChannelInfo = {
67
  /**
68
  * Default video orientation
69
  */
70
- orientation: defaultMediaOrientation
71
  }
 
1
  import { ChannelInfo } from "@/types/general"
2
+ import { defaultImageRatio } from "@aitube/clap"
3
 
4
  export const defaultChannel: ChannelInfo = {
5
  /**
 
67
  /**
68
  * Default video orientation
69
  */
70
+ orientation: defaultImageRatio
71
  }
src/app/api/generators/search/getNewMediaInfo.ts CHANGED
@@ -5,7 +5,7 @@ import {
5
  MediaInfo,
6
  } from "@/types/general"
7
  import { defaultChannel } from "./defaultChannel"
8
- import { defaultMediaOrientation } from "@aitube/clap"
9
 
10
  export function getNewMediaInfo(params: Partial<MediaInfo> = {}): MediaInfo {
11
 
@@ -133,7 +133,7 @@ export function getNewMediaInfo(params: Partial<MediaInfo> = {}): MediaInfo {
133
  /**
134
  * General media aspect ratio
135
  */
136
- orientation: defaultMediaOrientation,
137
 
138
  /**
139
  * Media projection (cartesian by default)
 
5
  MediaInfo,
6
  } from "@/types/general"
7
  import { defaultChannel } from "./defaultChannel"
8
+ import { defaultImageRatio } from "@aitube/clap"
9
 
10
  export function getNewMediaInfo(params: Partial<MediaInfo> = {}): MediaInfo {
11
 
 
133
  /**
134
  * General media aspect ratio
135
  */
136
+ orientation: defaultImageRatio,
137
 
138
  /**
139
  * Media projection (cartesian by default)
src/app/api/generators/search/searchResultToMediaInfo.ts CHANGED
@@ -8,7 +8,7 @@ import {
8
  import { newRender } from "../../providers/videochain/renderWithVideoChain"
9
 
10
  import { LatentSearchResult } from "./types"
11
- import { defaultMediaOrientation } from "@aitube/clap"
12
 
13
  const channel: ChannelInfo = {
14
  /**
@@ -76,7 +76,7 @@ const channel: ChannelInfo = {
76
  /**
77
  * Default video orientation
78
  */
79
- orientation: defaultMediaOrientation
80
  }
81
 
82
  export async function searchResultToMediaInfo(searchResult: LatentSearchResult): Promise<MediaInfo> {
@@ -218,7 +218,7 @@ export async function searchResultToMediaInfo(searchResult: LatentSearchResult):
218
  /**
219
  * General media aspect ratio
220
  */
221
- orientation: defaultMediaOrientation,
222
 
223
  /**
224
  * Media projection (cartesian by default)
 
8
  import { newRender } from "../../providers/videochain/renderWithVideoChain"
9
 
10
  import { LatentSearchResult } from "./types"
11
+ import { defaultImageRatio } from "@aitube/clap"
12
 
13
  const channel: ChannelInfo = {
14
  /**
 
76
  /**
77
  * Default video orientation
78
  */
79
+ orientation: defaultImageRatio
80
  }
81
 
82
  export async function searchResultToMediaInfo(searchResult: LatentSearchResult): Promise<MediaInfo> {
 
218
  /**
219
  * General media aspect ratio
220
  */
221
+ orientation: defaultImageRatio,
222
 
223
  /**
224
  * Media projection (cartesian by default)
src/app/api/parsers/parseDatasetPrompt.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { parseMediaOrientation, defaultMediaOrientation } from "@aitube/clap"
2
 
3
  import { ChannelInfo, ParsedDatasetPrompt } from "@/types/general"
4
  import { parseVideoModelName } from "./parseVideoModelName"
@@ -36,7 +36,7 @@ export function parseDatasetPrompt(markdown: string, channel: ChannelInfo): Pars
36
  thumbnail: typeof thumbnail === "string" && thumbnail ? thumbnail : "",
37
  voice: typeof voice === "string" && voice ? voice : (channel.voice || ""),
38
  music: typeof music === "string" && music ? music : (channel.music || ""),
39
- orientation: parseMediaOrientation(orientation, channel.orientation),
40
  }
41
  } catch (err) {
42
  return {
@@ -50,7 +50,7 @@ export function parseDatasetPrompt(markdown: string, channel: ChannelInfo): Pars
50
  thumbnail: "",
51
  voice: channel.voice || "",
52
  music: channel.music || "",
53
- orientation: channel.orientation || defaultMediaOrientation,
54
  }
55
  }
56
  }
 
1
+ import { parseImageRatio, defaultImageRatio } from "@aitube/clap"
2
 
3
  import { ChannelInfo, ParsedDatasetPrompt } from "@/types/general"
4
  import { parseVideoModelName } from "./parseVideoModelName"
 
36
  thumbnail: typeof thumbnail === "string" && thumbnail ? thumbnail : "",
37
  voice: typeof voice === "string" && voice ? voice : (channel.voice || ""),
38
  music: typeof music === "string" && music ? music : (channel.music || ""),
39
+ orientation: parseImageRatio(orientation, channel.orientation),
40
  }
41
  } catch (err) {
42
  return {
 
50
  thumbnail: "",
51
  voice: channel.voice || "",
52
  music: channel.music || "",
53
+ orientation: channel.orientation || defaultImageRatio,
54
  }
55
  }
56
  }
src/app/api/parsers/parseDatasetReadme.ts CHANGED
@@ -1,6 +1,6 @@
1
 
2
  import metadataParser from "markdown-yaml-metadata-parser"
3
- import { defaultMediaOrientation, parseMediaOrientation } from "@aitube/clap"
4
 
5
  import { ParsedDatasetReadme, ParsedMetadataAndContent } from "@/types/general"
6
  import { defaultVideoModel } from "@/app/config"
@@ -31,7 +31,7 @@ export function parseDatasetReadme(markdown: string = ""): ParsedDatasetReadme {
31
  music,
32
  description,
33
  prompt,
34
- orientation: parseMediaOrientation(orientation, defaultMediaOrientation),
35
  }
36
  } catch (err) {
37
  return {
@@ -47,7 +47,7 @@ export function parseDatasetReadme(markdown: string = ""): ParsedDatasetReadme {
47
  music: "",
48
  description: "",
49
  prompt: "",
50
- orientation: defaultMediaOrientation,
51
  }
52
  }
53
  }
 
1
 
2
  import metadataParser from "markdown-yaml-metadata-parser"
3
+ import { defaultImageRatio, parseImageRatio } from "@aitube/clap"
4
 
5
  import { ParsedDatasetReadme, ParsedMetadataAndContent } from "@/types/general"
6
  import { defaultVideoModel } from "@/app/config"
 
31
  music,
32
  description,
33
  prompt,
34
+ orientation: parseImageRatio(orientation, defaultImageRatio),
35
  }
36
  } catch (err) {
37
  return {
 
47
  music: "",
48
  description: "",
49
  prompt: "",
50
+ orientation: defaultImageRatio,
51
  }
52
  }
53
  }
src/app/api/utils/computeOrientationProjectionWidthHeight.ts CHANGED
@@ -1,7 +1,7 @@
1
  import { MediaProjection } from "@/types/general"
2
 
3
  import { parseProjectionFromLoRA } from "../parsers/parseProjectionFromLoRA"
4
- import { ClapMediaOrientation, parseMediaOrientation } from "@aitube/clap"
5
 
6
  export function computeOrientationProjectionWidthHeight({
7
  lora: maybeLora,
@@ -12,23 +12,23 @@ export function computeOrientationProjectionWidthHeight({
12
  projection?: any
13
  orientation?: any
14
  }): {
15
- orientation: ClapMediaOrientation
16
  projection: MediaProjection
17
  width: number
18
  height: number
19
  } {
20
 
21
  const lora = `${maybeLora || ""}`
22
- const orientation = parseMediaOrientation(maybeOrientation)
23
  const projection = maybeProjection ? maybeProjection : parseProjectionFromLoRA(lora)
24
 
25
  let width = 1024
26
  let height = 576
27
 
28
- if (orientation === "portrait") {
29
  height = 1024
30
  width = 576
31
- } else if (orientation === "square") {
32
  height = 512
33
  width = 512
34
  } else {
@@ -43,7 +43,7 @@ export function computeOrientationProjectionWidthHeight({
43
  }
44
 
45
  return {
46
- orientation,
47
  projection,
48
  width,
49
  height,
 
1
  import { MediaProjection } from "@/types/general"
2
 
3
  import { parseProjectionFromLoRA } from "../parsers/parseProjectionFromLoRA"
4
+ import { ClapImageRatio, parseImageRatio } from "@aitube/clap"
5
 
6
  export function computeOrientationProjectionWidthHeight({
7
  lora: maybeLora,
 
12
  projection?: any
13
  orientation?: any
14
  }): {
15
+ orientation: ClapImageRatio
16
  projection: MediaProjection
17
  width: number
18
  height: number
19
  } {
20
 
21
  const lora = `${maybeLora || ""}`
22
+ const imageRatio = parseImageRatio(maybeOrientation)
23
  const projection = maybeProjection ? maybeProjection : parseProjectionFromLoRA(lora)
24
 
25
  let width = 1024
26
  let height = 576
27
 
28
+ if (imageRatio === ClapImageRatio.PORTRAIT) {
29
  height = 1024
30
  width = 576
31
+ } else if (imageRatio === ClapImageRatio.SQUARE) {
32
  height = 512
33
  width = 512
34
  } else {
 
43
  }
44
 
45
  return {
46
+ orientation: imageRatio,
47
  projection,
48
  width,
49
  height,
src/app/api/v1/create/index.ts CHANGED
@@ -1,6 +1,6 @@
1
  "use server"
2
 
3
- import { ClapProject, getValidNumber, newClap, newSegment, ClapSegmentCategory, ClapOutputType, ClapMediaOrientation, ClapSegmentStatus } from "@aitube/clap"
4
 
5
  import { sleep } from "@/lib/utils/sleep"
6
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
@@ -110,16 +110,20 @@ Output: `
110
  description: prompt,
111
  synopsis: "",
112
  licence: "",
113
- orientation:
114
- width > height ? ClapMediaOrientation.LANDSCAPE :
115
- height > width ? ClapMediaOrientation.PORTRAIT :
116
- ClapMediaOrientation.SQUARE,
 
 
 
117
  width,
118
  height,
119
  isInteractive: false,
120
  isLoop: false,
121
  durationInMs: shots.length * defaultSegmentDurationInMs,
122
- defaultVideoModel: "AnimateDiff-Lightning",
 
123
  }
124
  })
125
 
@@ -154,7 +158,7 @@ Output: `
154
  startTimeInMs: currentElapsedTimeInMs,
155
  endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
156
  assetDurationInMs: defaultSegmentDurationInMs,
157
- category: ClapSegmentCategory.STORYBOARD,
158
  prompt: image,
159
  outputType: ClapOutputType.IMAGE,
160
  status: ClapSegmentStatus.TO_GENERATE,
 
1
  "use server"
2
 
3
+ import { ClapProject, getValidNumber, newClap, newSegment, ClapSegmentCategory, ClapOutputType, ClapImageRatio, ClapSegmentStatus } from "@aitube/clap"
4
 
5
  import { sleep } from "@/lib/utils/sleep"
6
  import { predict } from "@/app/api/providers/huggingface/predictWithHuggingFace"
 
110
  description: prompt,
111
  synopsis: "",
112
  licence: "",
113
+ imageRatio:
114
+ width > height ? ClapImageRatio.LANDSCAPE :
115
+ height > width ? ClapImageRatio.PORTRAIT :
116
+ ClapImageRatio.SQUARE,
117
+ storyPrompt: prompt,
118
+ imagePrompt: "",
119
+ systemPrompt: "",
120
  width,
121
  height,
122
  isInteractive: false,
123
  isLoop: false,
124
  durationInMs: shots.length * defaultSegmentDurationInMs,
125
+ bpm: 120,
126
+ frameRate: 24,
127
  }
128
  })
129
 
 
158
  startTimeInMs: currentElapsedTimeInMs,
159
  endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
160
  assetDurationInMs: defaultSegmentDurationInMs,
161
+ category: ClapSegmentCategory.IMAGE,
162
  prompt: image,
163
  outputType: ClapOutputType.IMAGE,
164
  status: ClapSegmentStatus.TO_GENERATE,
src/app/api/v1/edit/entities/clapToLatentStory.ts CHANGED
@@ -20,7 +20,7 @@ export async function clapToLatentStory(clap: ClapProject): Promise<LatentStory[
20
  ClapSegmentFilteringMode.START,
21
  shot,
22
  clap.segments,
23
- ClapSegmentCategory.STORYBOARD
24
  ).at(0)
25
 
26
  // note: the comment might be missing, that's on purpose
 
20
  ClapSegmentFilteringMode.START,
21
  shot,
22
  clap.segments,
23
+ ClapSegmentCategory.IMAGE
24
  ).at(0)
25
 
26
  // note: the comment might be missing, that's on purpose
src/app/api/v1/edit/story/extendClapStory.ts CHANGED
@@ -105,7 +105,7 @@ export async function extendClapStory({
105
  startTimeInMs: currentElapsedTimeInMs,
106
  endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
107
  assetDurationInMs: defaultSegmentDurationInMs,
108
- category: ClapSegmentCategory.STORYBOARD,
109
  prompt: image,
110
  outputType: ClapOutputType.IMAGE,
111
  status: ClapSegmentStatus.TO_GENERATE,
 
105
  startTimeInMs: currentElapsedTimeInMs,
106
  endTimeInMs: currentElapsedTimeInMs + defaultSegmentDurationInMs,
107
  assetDurationInMs: defaultSegmentDurationInMs,
108
+ category: ClapSegmentCategory.IMAGE,
109
  prompt: image,
110
  outputType: ClapOutputType.IMAGE,
111
  status: ClapSegmentStatus.TO_GENERATE,
src/app/api/v1/edit/storyboards/processShot.ts CHANGED
@@ -37,19 +37,19 @@ export async function processShot({
37
  )
38
 
39
  const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
40
- s.category === ClapSegmentCategory.STORYBOARD
41
  )
42
 
43
  let shotStoryboardSegment: ClapSegment | undefined = shotStoryboardSegments.at(0)
44
 
45
- // TASK 1: GENERATE MISSING STORYBOARD SEGMENT
46
  if (!shotStoryboardSegment) {
47
  shotStoryboardSegment = newSegment({
48
  track: 1,
49
  startTimeInMs: shotSegment.startTimeInMs,
50
  endTimeInMs: shotSegment.endTimeInMs,
51
  assetDurationInMs: shotSegment.assetDurationInMs,
52
- category: ClapSegmentCategory.STORYBOARD,
53
  prompt: "",
54
  assetUrl: "",
55
  outputType: ClapOutputType.IMAGE,
@@ -65,7 +65,7 @@ export async function processShot({
65
 
66
  if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
67
 
68
- // TASK 2: GENERATE MISSING STORYBOARD PROMPT
69
  if (!shotStoryboardSegment?.prompt) {
70
  // storyboard is missing, let's generate it
71
  shotStoryboardSegment.prompt = getVideoPrompt(
@@ -76,7 +76,7 @@ export async function processShot({
76
  // console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
77
  }
78
 
79
- // TASK 3: GENERATE MISSING STORYBOARD BITMAP
80
  if (!shotStoryboardSegment.assetUrl) {
81
  // console.log(`[api/v1/edit/storyboards] generating image..`)
82
 
 
37
  )
38
 
39
  const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
40
+ s.category === ClapSegmentCategory.IMAGE
41
  )
42
 
43
  let shotStoryboardSegment: ClapSegment | undefined = shotStoryboardSegments.at(0)
44
 
45
+ // TASK 1: GENERATE MISSING IMAGE SEGMENT
46
  if (!shotStoryboardSegment) {
47
  shotStoryboardSegment = newSegment({
48
  track: 1,
49
  startTimeInMs: shotSegment.startTimeInMs,
50
  endTimeInMs: shotSegment.endTimeInMs,
51
  assetDurationInMs: shotSegment.assetDurationInMs,
52
+ category: ClapSegmentCategory.IMAGE,
53
  prompt: "",
54
  assetUrl: "",
55
  outputType: ClapOutputType.IMAGE,
 
65
 
66
  if (!shotStoryboardSegment) { throw new Error(`failed to generate a newSegment`) }
67
 
68
+ // TASK 2: GENERATE MISSING IMAGE PROMPT
69
  if (!shotStoryboardSegment?.prompt) {
70
  // storyboard is missing, let's generate it
71
  shotStoryboardSegment.prompt = getVideoPrompt(
 
76
  // console.log(`[api/v1/edit/storyboards] processShot: generating storyboard prompt: ${shotStoryboardSegment.prompt}`)
77
  }
78
 
79
+ // TASK 3: GENERATE MISSING IMAGE BITMAP
80
  if (!shotStoryboardSegment.assetUrl) {
81
  // console.log(`[api/v1/edit/storyboards] generating image..`)
82
 
src/app/api/v1/edit/videos/processShot.ts CHANGED
@@ -9,7 +9,7 @@ import {
9
  ClapOutputType,
10
  ClapSegmentCategory,
11
  ClapSegmentStatus,
12
- parseMediaOrientation
13
  } from "@aitube/clap"
14
  import { ClapCompletionMode } from "@aitube/clap"
15
  import { getVideoPrompt } from "@aitube/engine"
@@ -46,7 +46,7 @@ export async function processShot({
46
  let shotVideoSegment: ClapSegment | undefined = shotVideoSegments.at(0)
47
 
48
  const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
49
- s.category === ClapSegmentCategory.STORYBOARD
50
  )
51
 
52
  let shotStoryboardSegment: ClapSegment | undefined = shotStoryboardSegments.at(0)
@@ -175,7 +175,7 @@ export async function processShot({
175
  startTimeInMs: shotSegment.startTimeInMs,
176
  endTimeInMs: shotSegment.endTimeInMs,
177
  assetDurationInMs: shotSegment.assetDurationInMs,
178
- category: ClapSegmentCategory.STORYBOARD,
179
  prompt: shotVideoSegment.prompt,
180
  outputType: ClapOutputType.IMAGE,
181
  status: ClapSegmentStatus.TO_GENERATE,
 
9
  ClapOutputType,
10
  ClapSegmentCategory,
11
  ClapSegmentStatus,
12
+ parseImageRatio
13
  } from "@aitube/clap"
14
  import { ClapCompletionMode } from "@aitube/clap"
15
  import { getVideoPrompt } from "@aitube/engine"
 
46
  let shotVideoSegment: ClapSegment | undefined = shotVideoSegments.at(0)
47
 
48
  const shotStoryboardSegments: ClapSegment[] = shotSegments.filter(s =>
49
+ s.category === ClapSegmentCategory.IMAGE
50
  )
51
 
52
  let shotStoryboardSegment: ClapSegment | undefined = shotStoryboardSegments.at(0)
 
175
  startTimeInMs: shotSegment.startTimeInMs,
176
  endTimeInMs: shotSegment.endTimeInMs,
177
  assetDurationInMs: shotSegment.assetDurationInMs,
178
+ category: ClapSegmentCategory.IMAGE,
179
  prompt: shotVideoSegment.prompt,
180
  outputType: ClapOutputType.IMAGE,
181
  status: ClapSegmentStatus.TO_GENERATE,
src/app/views/user-channel-view/index.tsx CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import { useEffect, useState, useTransition } from "react"
4
 
5
- import { defaultMediaOrientation, parseMediaOrientation } from "@aitube/clap"
6
  import { useLocalStorage } from "usehooks-ts"
7
 
8
  import { useStore } from "@/app/state/useStore"
@@ -37,7 +37,7 @@ export function UserChannelView() {
37
  const [voice, setVoice] = useState(defaultVoice)
38
  const [music, setMusic] = useState("")
39
  const [duration, setDuration] = useState(0)
40
- const [orientation, setOrientation] = useState(defaultMediaOrientation)
41
 
42
  // we do not include the tags in the list of required fields
43
  const missingFields = !title || !description || !prompt
@@ -243,9 +243,9 @@ export function UserChannelView() {
243
  <div className="flex flex-col space-y-2 flex-grow">
244
  <Select
245
  onValueChange={(value: string) => {
246
- setOrientation(parseMediaOrientation(value, defaultMediaOrientation))
247
  }}
248
- defaultValue={defaultMediaOrientation}>
249
  <SelectTrigger className="">
250
  <SelectValue placeholder="Video orientation" />
251
  </SelectTrigger>
 
2
 
3
  import { useEffect, useState, useTransition } from "react"
4
 
5
+ import { defaultImageRatio, parseImageRatio } from "@aitube/clap"
6
  import { useLocalStorage } from "usehooks-ts"
7
 
8
  import { useStore } from "@/app/state/useStore"
 
37
  const [voice, setVoice] = useState(defaultVoice)
38
  const [music, setMusic] = useState("")
39
  const [duration, setDuration] = useState(0)
40
+ const [orientation, setOrientation] = useState(defaultImageRatio)
41
 
42
  // we do not include the tags in the list of required fields
43
  const missingFields = !title || !description || !prompt
 
243
  <div className="flex flex-col space-y-2 flex-grow">
244
  <Select
245
  onValueChange={(value: string) => {
246
+ setOrientation(parseImageRatio(value, defaultImageRatio))
247
  }}
248
+ defaultValue={defaultImageRatio}>
249
  <SelectTrigger className="">
250
  <SelectValue placeholder="Video orientation" />
251
  </SelectTrigger>
src/components/interface/latent-engine/core/generateClapFromPrompt.ts CHANGED
@@ -21,8 +21,12 @@ export function generateClapFromPrompt({
21
  title: "Interactive Demo",
22
  isInteractive: true,
23
  isLoop: true,
24
- description: story,
25
- synopsis: story,
 
 
 
 
26
  }
27
  })
28
 
@@ -100,7 +104,7 @@ export function generateClapFromPrompt({
100
  track: 0,
101
  startTimeInMs,
102
  endTimeInMs,
103
- category: ClapSegmentCategory.STORYBOARD,
104
  prompt: "",
105
  label: "movie screencap",
106
  outputType: ClapOutputType.IMAGE,
 
21
  title: "Interactive Demo",
22
  isInteractive: true,
23
  isLoop: true,
24
+ storyPrompt: story.join('. '),
25
+ imagePrompt: "",
26
+ systemPrompt: "",
27
+ synopsis: story.join('. '),
28
+ bpm: 120,
29
+ frameRate: 24,
30
  }
31
  })
32
 
 
104
  track: 0,
105
  startTimeInMs,
106
  endTimeInMs,
107
+ category: ClapSegmentCategory.IMAGE,
108
  prompt: "",
109
  label: "movie screencap",
110
  outputType: ClapOutputType.IMAGE,
src/components/interface/latent-engine/resolvers/resolveSegment.ts CHANGED
@@ -14,7 +14,7 @@ export async function resolveSegment(segment: ClapSegment, clap: ClapProject): P
14
  latentComponentResolver = interfaceResolver
15
  } else if (segment.category === ClapSegmentCategory.VIDEO) {
16
  latentComponentResolver = videoResolver
17
- } else if (segment.category === ClapSegmentCategory.STORYBOARD) {
18
  latentComponentResolver = imageResolver
19
  }
20
 
 
14
  latentComponentResolver = interfaceResolver
15
  } else if (segment.category === ClapSegmentCategory.VIDEO) {
16
  latentComponentResolver = videoResolver
17
+ } else if (segment.category === ClapSegmentCategory.IMAGE) {
18
  latentComponentResolver = imageResolver
19
  }
20
 
src/lib/huggingface/hub/src/utils/FileBlob.ts CHANGED
@@ -78,7 +78,7 @@ export class FileBlob extends Blob {
78
  * Read the part of the file delimited by the FileBlob and returns it as an ArrayBuffer.
79
  */
80
  override async arrayBuffer(): Promise<ArrayBuffer> {
81
- const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start));
82
 
83
  return slice.buffer;
84
  }
@@ -87,7 +87,7 @@ export class FileBlob extends Blob {
87
  * Read the part of the file delimited by the FileBlob and returns it as a string.
88
  */
89
  override async text(): Promise<string> {
90
- const buffer = (await this.arrayBuffer()) as Buffer;
91
 
92
  return buffer.toString("utf8");
93
  }
@@ -96,7 +96,7 @@ export class FileBlob extends Blob {
96
  * Returns a stream around the part of the file delimited by the FileBlob.
97
  */
98
  override stream(): ReturnType<Blob["stream"]> {
99
- return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType<
100
  Blob["stream"]
101
  >;
102
  }
 
78
  * Read the part of the file delimited by the FileBlob and returns it as an ArrayBuffer.
79
  */
80
  override async arrayBuffer(): Promise<ArrayBuffer> {
81
+ const slice = await this.execute((file) => file.read(Buffer.alloc(this.size) as any, 0, this.size, this.start));
82
 
83
  return slice.buffer;
84
  }
 
87
  * Read the part of the file delimited by the FileBlob and returns it as a string.
88
  */
89
  override async text(): Promise<string> {
90
+ const buffer = (await this.arrayBuffer()) as unknown as Buffer;
91
 
92
  return buffer.toString("utf8");
93
  }
 
96
  * Returns a stream around the part of the file delimited by the FileBlob.
97
  */
98
  override stream(): ReturnType<Blob["stream"]> {
99
+ return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as unknown as ReturnType<
100
  Blob["stream"]
101
  >;
102
  }
src/lib/huggingface/hub/src/utils/sha256-node.ts CHANGED
@@ -12,7 +12,7 @@ export async function* sha256Node(
12
  const size = buffer instanceof Blob ? buffer.size : buffer.byteLength;
13
  let done = 0;
14
  const readable =
15
- buffer instanceof Blob ? Readable.fromWeb(buffer.stream() as ReadableStream) : Readable.from(Buffer.from(buffer));
16
 
17
  for await (const buffer of readable) {
18
  sha256Stream.update(buffer);
 
12
  const size = buffer instanceof Blob ? buffer.size : buffer.byteLength;
13
  let done = 0;
14
  const readable =
15
+ buffer instanceof Blob ? Readable.fromWeb(buffer.stream() as unknown as ReadableStream) : Readable.from(Buffer.from(buffer));
16
 
17
  for await (const buffer of readable) {
18
  sha256Stream.update(buffer);
src/types/general.ts CHANGED
@@ -1,4 +1,4 @@
1
- import type { ClapMediaOrientation } from "@aitube/clap"
2
 
3
  export type ProjectionMode = 'cartesian' | 'spherical'
4
 
@@ -246,7 +246,7 @@ export type ChannelInfo = {
246
  /**
247
  * Default video orientation
248
  */
249
- orientation: ClapMediaOrientation
250
  }
251
 
252
  export type VideoStatus =
@@ -339,7 +339,7 @@ export type VideoRequest = {
339
  /**
340
  * Video orientation
341
  */
342
- orientation: ClapMediaOrientation
343
 
344
  /**
345
  * Video duration
@@ -487,7 +487,7 @@ export type MediaInfo = {
487
  /**
488
  * General media aspect ratio
489
  */
490
- orientation: ClapMediaOrientation
491
 
492
  /**
493
  * Media projection (cartesian by default)
@@ -660,7 +660,7 @@ export type ParsedDatasetReadme = {
660
  hf_tags: string[]
661
  description: string
662
  prompt: string
663
- orientation: ClapMediaOrientation
664
  }
665
 
666
  export type ParsedMetadataAndContent = {
@@ -683,7 +683,7 @@ export type ParsedDatasetPrompt = {
683
  thumbnail: string
684
  voice: string
685
  music: string
686
- orientation: ClapMediaOrientation
687
  }
688
 
689
  export type UpdateQueueRequest = {
 
1
+ import type { ClapImageRatio } from "@aitube/clap"
2
 
3
  export type ProjectionMode = 'cartesian' | 'spherical'
4
 
 
246
  /**
247
  * Default video orientation
248
  */
249
+ orientation: ClapImageRatio
250
  }
251
 
252
  export type VideoStatus =
 
339
  /**
340
  * Video orientation
341
  */
342
+ orientation: ClapImageRatio
343
 
344
  /**
345
  * Video duration
 
487
  /**
488
  * General media aspect ratio
489
  */
490
+ orientation: ClapImageRatio
491
 
492
  /**
493
  * Media projection (cartesian by default)
 
660
  hf_tags: string[]
661
  description: string
662
  prompt: string
663
+ orientation: ClapImageRatio
664
  }
665
 
666
  export type ParsedMetadataAndContent = {
 
683
  thumbnail: string
684
  voice: string
685
  music: string
686
+ orientation: ClapImageRatio
687
  }
688
 
689
  export type UpdateQueueRequest = {