jbilcke-hf HF staff commited on
Commit
e781b23
β€’
1 Parent(s): b93a813
public/markdown-to-html.js ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ;(function() { "use strict";
2
+
3
+ var
4
+ /**
5
+ * The parsed output string, in HTML format.
6
+ * @type {String}
7
+ */
8
+ output = "",
9
+
10
+ BLOCK = "block",
11
+ INLINE = "inline",
12
+
13
+ /**
14
+ * Used to attach MarkdownToHtml object to `window` in browser
15
+ * context, or as an AMD module where appropriate.
16
+ * @type {Object}
17
+ */
18
+ exports,
19
+
20
+ /**
21
+ * An array of parse rule descriptor objects. Each object has two keys;
22
+ * pattern (the RegExp to match), and replace (the replacement string or
23
+ * function to execute).
24
+ * @type {Array}
25
+ */
26
+ parseMap = [
27
+ {
28
+ // <h1>
29
+ // A line starting with 1-6 hashes.
30
+ pattern: /(#{1,6})([^\n]+)/g,
31
+ replace: "<h$L1>$2</h$L1>",
32
+ type: BLOCK,
33
+ },
34
+ {
35
+ // <p>
36
+ // Any line surrounded by newlines that doesn't start with
37
+ // an HTML tag, asterisk or numeric value with dot following.
38
+ pattern: /\n(?!<\/?\w+>|\s?\*|\s?[0-9]+|>|\&gt;|-{5,})([^\n]+)/g,
39
+ replace: "<p>$1</p>",
40
+ type: BLOCK,
41
+ },
42
+ {
43
+ // <blockquote>
44
+ // A greater-than character preceding any characters.
45
+ pattern: /\n(?:&gt;|\>)\W*(.*)/g,
46
+ replace: "<blockquote><p>$1</p></blockquote>",
47
+ type: BLOCK,
48
+ },
49
+ {
50
+ // <ul>
51
+ //
52
+ pattern: /\n\s?\*\s*(.*)/g,
53
+ replace: "<ul>\n\t<li>$1</li>\n</ul>",
54
+ type: BLOCK,
55
+ },
56
+ {
57
+ // <ol>
58
+ //
59
+ pattern: /\n\s?[0-9]+\.\s*(.*)/g,
60
+ replace: "<ol>\n\t<li>$1</li>\n</ol>",
61
+ type: BLOCK,
62
+ },
63
+ {
64
+ // <strong>
65
+ // Either two asterisks or two underscores, followed by any
66
+ // characters, followed by the same two starting characters.
67
+ pattern: /(\*\*|__)(.*?)\1/g,
68
+ replace: "<strong>$2</strong>",
69
+ type: INLINE,
70
+ },
71
+ {
72
+ // <em>
73
+ // Either one asterisk or one underscore, followed by any
74
+ // characters, followed by the starting character.
75
+ pattern: /(\*|_)(.*?)\1/g,
76
+ replace: "<em>$2</em>",
77
+ type: INLINE,
78
+ },
79
+ {
80
+ // <a>
81
+ // Not starting with an exclamation mark, square brackets
82
+ // surrounding any characters, followed by parenthesis surrounding
83
+ // any characters.
84
+ pattern: /([^!])\[([^\[]+)\]\(([^\)]+)\)/g,
85
+ replace: "$1<a href=\"$3\">$2</a>",
86
+ type: INLINE,
87
+ },
88
+ {
89
+ // <img>
90
+ // Starting with an exclamation mark, then followed by square
91
+ // brackets surrounding any characters, followed by parenthesis
92
+ // surrounding any characters.
93
+ pattern: /!\[([^\[]+)\]\(([^\)]+)\)/g,
94
+ replace: "<img src=\"$2\" alt=\"$1\" />",
95
+ type: INLINE,
96
+ },
97
+ {
98
+ // <del>
99
+ // Double tilde characters surrounding any characters.
100
+ pattern: /\~\~(.*?)\~\~/g,
101
+ replace: "<del>$1</del>",
102
+ type: INLINE,
103
+ },
104
+ {
105
+ // <code>
106
+ //
107
+ pattern: /`(.*?)`/g,
108
+ replace: "<code>$1</code>",
109
+ type: INLINE,
110
+ },
111
+ {
112
+ // <hr>
113
+ //
114
+ pattern: /\n-{5,}\n/g,
115
+ replace: "<hr />",
116
+ type: BLOCK,
117
+ },
118
+ ],
119
+ $$;
120
+
121
+ /**
122
+ * Self-executing function to handle exporting the parse function for
123
+ * external use.
124
+ */
125
+ (function go() {
126
+ // Export AMD module if possible.
127
+ if(typeof module !== "undefined"
128
+ && typeof module.exports !== "undefined") {
129
+ exports = module.exports;
130
+ }
131
+ // Otherwise check for browser context.
132
+ else if(typeof window !== "undefined") {
133
+ window.MarkdownToHtml = {};
134
+ exports = window.MarkdownToHtml;
135
+ }
136
+
137
+ exports.parse = parse;
138
+ })();
139
+
140
+ /**
141
+ * Parses a provided Markdown string into valid HTML.
142
+ *
143
+ * @param {string} string Markdown input for transformation
144
+ * @return {string} Transformed HTML output
145
+ */
146
+ function parse(string) {
147
+ // Pad with newlines for compatibility.
148
+ output = "\n" + string + "\n";
149
+
150
+ parseMap.forEach(function(p) {
151
+ // Replace all matches of provided RegExp pattern with either the
152
+ // replacement string or callback function.
153
+ output = output.replace(p.pattern, function() {
154
+ // console.log(this, arguments);
155
+ return replace.call(this, arguments, p.replace, p.type);
156
+ });
157
+ });
158
+
159
+ // Perform any post-processing required.
160
+ output = clean(output);
161
+ // Trim for any spaces or newlines.
162
+ output = output.trim();
163
+ // Tidy up newlines to condense where more than 1 occurs back to back.
164
+ output = output.replace(/[\n]{1,}/g, "\n");
165
+ return output;
166
+ }
167
+
168
+ function replace(matchList, replacement, type) {
169
+ var
170
+ i,
171
+ $$;
172
+
173
+ for(i in matchList) {
174
+ if(!matchList.hasOwnProperty(i)) {
175
+ continue;
176
+ }
177
+
178
+ // Replace $n with the matching regexp group.
179
+ replacement = replacement.split("$" + i).join(matchList[i]);
180
+ // Replace $Ln with the matching regexp group's string length.
181
+ replacement = replacement.split("$L" + i).join(matchList[i].length);
182
+ }
183
+
184
+ if(type === BLOCK) {
185
+ replacement = replacement.trim() + "\n";
186
+ }
187
+
188
+ return replacement;
189
+ }
190
+
191
+ function clean(string) {
192
+ var cleaningRuleArray = [
193
+ {
194
+ match: /<\/([uo]l)>\s*<\1>/g,
195
+ replacement: "",
196
+ },
197
+ {
198
+ match: /(<\/\w+>)<\/(blockquote)>\s*<\2>/g,
199
+ replacement: "$1",
200
+ },
201
+ ];
202
+
203
+ cleaningRuleArray.forEach(function(rule) {
204
+ string = string.replace(rule.match, rule.replacement);
205
+ });
206
+
207
+ return string;
208
+ }
209
+
210
+ })();
src/docker.mts ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export const dockerfile = `
2
+ FROM node:18-alpine AS base
3
+
4
+ # Install dependencies only when needed
5
+ FROM base AS deps
6
+ # Check https://github.com/nodejs/docker-node/tree/b4117f9333da4138b03a546ec926ef50a31506c3#nodealpine to understand why libc6-compat might be needed.
7
+ RUN apk add --no-cache libc6-compat
8
+ WORKDIR /app
9
+
10
+ # Install dependencies based on the preferred package manager
11
+ COPY package.json package-lock.json* ./
12
+ RUN npm install
13
+
14
+ # Uncomment the following lines if you want to use a secret at buildtime,
15
+ # for example to access your private npm packages
16
+ # RUN --mount=type=secret,id=HF_EXAMPLE_SECRET,mode=0444,required=true \
17
+ # $(cat /run/secrets/HF_EXAMPLE_SECRET)
18
+
19
+ # Rebuild the source code only when needed
20
+ FROM base AS builder
21
+ WORKDIR /app
22
+ COPY --from=deps /app/node_modules ./node_modules
23
+ COPY . .
24
+
25
+ # Next.js collects completely anonymous telemetry data about general usage.
26
+ # Learn more here: https://nextjs.org/telemetry
27
+ # Uncomment the following line in case you want to disable telemetry during the build.
28
+ # ENV NEXT_TELEMETRY_DISABLED 1
29
+
30
+ RUN npm run build
31
+
32
+ # Production image, copy all the files and run next
33
+ FROM base AS runner
34
+ WORKDIR /app
35
+
36
+ ENV NODE_ENV production
37
+ # Uncomment the following line in case you want to disable telemetry during runtime.
38
+ # ENV NEXT_TELEMETRY_DISABLED 1
39
+
40
+ RUN addgroup --system --gid 1001 nodejs
41
+ RUN adduser --system --uid 1001 nextjs
42
+
43
+ COPY --from=builder /app/public ./public
44
+
45
+ # Automatically leverage output traces to reduce image size
46
+ # https://nextjs.org/docs/advanced-features/output-file-tracing
47
+ COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./
48
+ COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static
49
+ COPY --from=builder --chown=nextjs:nodejs /app/.next/cache ./.next/cache
50
+ # COPY --from=builder --chown=nextjs:nodejs /app/.next/cache/fetch-cache ./.next/cache/fetch-cache
51
+
52
+ USER nextjs
53
+
54
+ EXPOSE 3000
55
+
56
+ ENV PORT 3000
57
+
58
+ CMD ["node", "server.js"]
59
+ `
src/generateFiles.mts CHANGED
@@ -8,12 +8,16 @@ import { getReactApp } from './getReactApp.mts'
8
  import { isPythonAppPrompt } from './isPythonAppPrompt.mts'
9
  import { isReactAppPrompt } from './isReactAppPrompt.mts'
10
 
11
- export const generateFiles = async (prompt: string, token: string) => {
 
 
 
 
12
  if (`${prompt}`.length < 2) {
13
  throw new Error(`prompt too short, please enter at least ${prompt} characters`)
14
  }
15
 
16
- const { prefix, instructions } =
17
  isPythonAppPrompt(prompt)
18
  ? getPythonApp(prompt)
19
  : isReactAppPrompt(prompt)
@@ -22,11 +26,15 @@ export const generateFiles = async (prompt: string, token: string) => {
22
 
23
  const inputs = createLlamaPrompt(instructions) + "\nSure! Here are the source files:\n" + prefix
24
 
25
- let tutorial = prefix
 
 
26
 
27
  try {
28
  const hf = new HfInference(token)
29
 
 
 
30
  for await (const output of hf.textGenerationStream({
31
  // model: "tiiuae/falcon-180B-chat",
32
  model: "codellama/CodeLlama-34b-Instruct-hf",
@@ -49,25 +57,39 @@ let tutorial = prefix
49
  tutorial += output.token.text
50
  process.stdout.write(output.token.text)
51
  // res.write(output.token.text)
52
- if (tutorial.includes('<|end|>')
 
 
53
  || tutorial.includes('[ENDINSTRUCTION]')
54
  || tutorial.includes('[/TASK]')
55
  || tutorial.includes('<|assistant|>')) {
 
 
 
 
 
 
56
  break
57
  }
58
  }
59
 
60
  } catch (e) {
 
61
  console.log("failed:")
62
  console.log(e)
63
  }
64
 
 
 
 
 
 
65
  console.log("analyzing the generated instructions..")
66
- const files = parseTutorial(tutorial).map(({ filename, content }) => ({
67
- path: `${filename || ""}`.trim().replace(" ", ""),
68
  content: `${content || ""}`
69
  } as RepoFile))
70
  .filter(res => res.path.length && res.content.length)
71
 
72
- return files
73
  }
 
8
  import { isPythonAppPrompt } from './isPythonAppPrompt.mts'
9
  import { isReactAppPrompt } from './isReactAppPrompt.mts'
10
 
11
+ export const generateFiles = async (
12
+ prompt: string,
13
+ token: string,
14
+ onProgress: (chunk: string) => boolean
15
+ ) => {
16
  if (`${prompt}`.length < 2) {
17
  throw new Error(`prompt too short, please enter at least ${prompt} characters`)
18
  }
19
 
20
+ const { prefix, files, instructions } =
21
  isPythonAppPrompt(prompt)
22
  ? getPythonApp(prompt)
23
  : isReactAppPrompt(prompt)
 
26
 
27
  const inputs = createLlamaPrompt(instructions) + "\nSure! Here are the source files:\n" + prefix
28
 
29
+ let isAbortedOrFailed = false
30
+
31
+ let tutorial = prefix
32
 
33
  try {
34
  const hf = new HfInference(token)
35
 
36
+ onProgress(prefix)
37
+
38
  for await (const output of hf.textGenerationStream({
39
  // model: "tiiuae/falcon-180B-chat",
40
  model: "codellama/CodeLlama-34b-Instruct-hf",
 
57
  tutorial += output.token.text
58
  process.stdout.write(output.token.text)
59
  // res.write(output.token.text)
60
+ if (
61
+ tutorial.includes('<|end|>')
62
+ || tutorial.includes('</s>')
63
  || tutorial.includes('[ENDINSTRUCTION]')
64
  || tutorial.includes('[/TASK]')
65
  || tutorial.includes('<|assistant|>')) {
66
+ tutorial = tutorial.replaceAll("</s>", "").replaceAll("<|end|>", "")
67
+ break
68
+ }
69
+ if (!onProgress(output.token.text)) {
70
+ console.log("aborting the LLM generation")
71
+ isAbortedOrFailed = true
72
  break
73
  }
74
  }
75
 
76
  } catch (e) {
77
+ isAbortedOrFailed = true
78
  console.log("failed:")
79
  console.log(e)
80
  }
81
 
82
+ if (isAbortedOrFailed) {
83
+ console.log("the request was aborted, so we return an empty list")
84
+ return []
85
+ }
86
+
87
  console.log("analyzing the generated instructions..")
88
+ const generatedFiles = parseTutorial(tutorial).map(({ filename, content }) => ({
89
+ path: `${filename || ""}`.trim().replaceAll(" ", ""),
90
  content: `${content || ""}`
91
  } as RepoFile))
92
  .filter(res => res.path.length && res.content.length)
93
 
94
+ return [...generatedFiles, ...files]
95
  }
src/getPythonApp.mts CHANGED
@@ -37,5 +37,5 @@ The app is about: ${prompt}`,
37
  }
38
  ]
39
 
40
- return { prefix, instructions }
41
  }
 
37
  }
38
  ]
39
 
40
+ return { prefix, files: [], instructions }
41
  }
src/getReactApp.mts CHANGED
@@ -1,8 +1,15 @@
1
  import { alpine } from "./alpine.mts"
2
  import { daisy } from "./daisy.mts"
 
3
 
4
  export function getReactApp(prompt: string) {
5
- const prefix = `# In src/main.tsx:\n\`\`\``
 
 
 
 
 
 
6
  const instructions = [
7
  {
8
  role: "system",
@@ -12,34 +19,14 @@ export function getReactApp(prompt: string) {
12
  },
13
  {
14
  role: "user",
15
- content: `Please write, file by file, the source code for a Next 12 application.
16
-
17
- The app should be buildable when we call:
18
-
19
  \`\`\`
20
  npm install
21
  npm run start
22
  \`\`\`
23
 
24
- And installable using a Dockerfile. Here is an example:
25
-
26
- \`\`\`
27
- FROM node:18
28
- RUN useradd -o -u 1000 user
29
- USER user
30
- ENV HOME=/home/user \
31
- PATH=/home/user/.local/bin:$PATH
32
- WORKDIR $HOME/app
33
- COPY --chown=user package*.json $HOME/app
34
- RUN npm install
35
- COPY --chown=user . $HOME/app
36
- EXPOSE 7860
37
- CMD [ "npm", "run", "start" ]
38
- \`\`\`
39
-
40
- Don't forget to write a valid package.json file!
41
-
42
- Don't forget to write a README.md with the following header:
43
  \`\`\`
44
  ---
45
  license: apache-2.0
@@ -51,11 +38,15 @@ colorTo: green
51
  ---
52
  \`\`\`
53
 
54
- Of course, you MUST replace <APPNAME> with a good app name!
 
 
 
 
55
 
56
- The app is about: ${prompt}`,
57
  }
58
  ]
59
 
60
- return { prefix, instructions }
61
  }
 
1
  import { alpine } from "./alpine.mts"
2
  import { daisy } from "./daisy.mts"
3
+ import { dockerfile } from "./docker.mts"
4
 
5
  export function getReactApp(prompt: string) {
6
+ const prefix = `# In src/pages/index.tsx:\n\`\`\``
7
+ const files = [
8
+ {
9
+ path: `Dockerfile`,
10
+ content: dockerfile,
11
+ }
12
+ ]
13
  const instructions = [
14
  {
15
  role: "system",
 
19
  },
20
  {
21
  role: "user",
22
+ content: `Think step by step, you got this! Please write, file by file, the source code for a Next 12 application.
23
+ The app should be buildable when we run this in command line:
 
 
24
  \`\`\`
25
  npm install
26
  npm run start
27
  \`\`\`
28
 
29
+ The project will be deployed to Hugging Face, so it must include a README.md with the following YAML header:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  \`\`\`
31
  ---
32
  license: apache-2.0
 
38
  ---
39
  \`\`\`
40
 
41
+ Important rules:
42
+ - you need to leave: "sdk: docker" as-is, but replace: "<APPNAME>" with an actual name, please.
43
+ - Don't forget to write a valid package.json file!
44
+
45
+ The app is about: ${prompt}.
46
 
47
+ Remember: don't forget to edit the README.me and a package.json file!`,
48
  }
49
  ]
50
 
51
+ return { prefix, files, instructions }
52
  }
src/getWebApp.mts CHANGED
@@ -49,5 +49,5 @@ The app is about: ${prompt}`,
49
  }
50
  ]
51
 
52
- return { prefix, instructions }
53
  }
 
49
  }
50
  ]
51
 
52
+ return { prefix, files: [], instructions }
53
  }
src/index.mts CHANGED
@@ -53,12 +53,33 @@ app.get('/app', async (req, res) => {
53
  return
54
  }
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  const id = `${pending.total++}`
57
  console.log(`new request ${id}`)
58
 
59
  pending.queue.push(id)
60
 
61
-
62
  req.on('close', function() {
63
  endRequest(id, 'browser asked to end the connection')
64
  })
@@ -72,18 +93,31 @@ app.get('/app', async (req, res) => {
72
  let files = []
73
 
74
  while (nbAttempts-- > 0) {
75
- files = await generateFiles(`${req.query.prompt ||Β ""}`, token)
 
 
 
 
 
 
 
 
 
 
76
  if (files.length) {
77
  console.log(`seems like we have ${files.length} files`)
78
  break
79
  }
80
  }
81
 
82
- console.log("files:", JSON.stringify(files, null, 2))
 
83
 
84
- await createSpace(files, token)
 
85
 
86
- res.write(JSON.stringify(files, null, 2))
 
87
  res.end()
88
  })
89
 
 
53
  return
54
  }
55
 
56
+ /*
57
+ res.write(`<!doctype html>
58
+ <script src="/markdown-to-html.js"></script>
59
+ <div id="formatted-markdown"></div>
60
+ <script>
61
+ setInterval(
62
+ function fn() {
63
+ try {
64
+ var input = document.getElementById("raw-markdown-stream")
65
+ var output = document.getElementById("formatted-markdown")
66
+ output.innerHTML = MarkdownToHtml.parse(input.innerHTML)
67
+ } catch (err) {
68
+ console.error(err)
69
+ }
70
+ },
71
+ 1000
72
+ )
73
+ </script>
74
+ <div id="raw-markdown-stream" style="display: none">
75
+ `)
76
+ */
77
+
78
  const id = `${pending.total++}`
79
  console.log(`new request ${id}`)
80
 
81
  pending.queue.push(id)
82
 
 
83
  req.on('close', function() {
84
  endRequest(id, 'browser asked to end the connection')
85
  })
 
93
  let files = []
94
 
95
  while (nbAttempts-- > 0) {
96
+ files = await generateFiles(
97
+ `${req.query.prompt ||Β ""}`,
98
+ token,
99
+ (chunk: string) => {
100
+ res.write(chunk)
101
+
102
+ // return true here as long as our request is still valid
103
+ // but if the user disconnected, the id will be removed from the queue,
104
+ // and we will return false, indicating to generateFiles that we should abort
105
+ return pending.queue.includes(id)
106
+ })
107
  if (files.length) {
108
  console.log(`seems like we have ${files.length} files`)
109
  break
110
  }
111
  }
112
 
113
+ if (files.length > 0) {
114
+ console.log("files:", JSON.stringify(files, null, 2))
115
 
116
+ await createSpace(files, token)
117
+ }
118
 
119
+ // res.write(JSON.stringify(files, null, 2))
120
+ // res.write(`</div>`)
121
  res.end()
122
  })
123
 
tsconfig.json CHANGED
@@ -6,7 +6,7 @@
6
  "module": "nodenext",
7
  "noEmit": true,
8
  "allowImportingTsExtensions": true,
9
- "target": "es2017"
10
  },
11
  "include": ["**/*.ts", "**/*.mts"],
12
  }
 
6
  "module": "nodenext",
7
  "noEmit": true,
8
  "allowImportingTsExtensions": true,
9
+ "target": "es2022"
10
  },
11
  "include": ["**/*.ts", "**/*.mts"],
12
  }