radames commited on
Commit
16f1f6d
1 Parent(s): 9d3a993

Upload 6 files

Browse files
Files changed (6) hide show
  1. blipWorker.js +78 -0
  2. build/m.d.ts +55 -0
  3. build/m.js +329 -0
  4. build/m_bg.wasm +3 -0
  5. build/m_bg.wasm.d.ts +12 -0
  6. index.html +390 -16
blipWorker.js ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ //load Candle Bert Module wasm module
2
+ import init, { Model } from "./build/m.js";
3
+
4
+ async function fetchArrayBuffer(url, cacheFile = true) {
5
+ if (!cacheFile) return new Uint8Array(await (await fetch(url)).arrayBuffer());
6
+ const cacheName = "blip-candle-cache";
7
+ const cache = await caches.open(cacheName);
8
+ const cachedResponse = await cache.match(url);
9
+ if (cachedResponse) {
10
+ const data = await cachedResponse.arrayBuffer();
11
+ return new Uint8Array(data);
12
+ }
13
+ const res = await fetch(url, { cache: "force-cache" });
14
+ cache.put(url, res.clone());
15
+ return new Uint8Array(await res.arrayBuffer());
16
+ }
17
+ class Blip {
18
+ static instance = {};
19
+
20
+ static async getInstance(
21
+ weightsURL,
22
+ tokenizerURL,
23
+ configURL,
24
+ modelID,
25
+ quantized
26
+ ) {
27
+ if (!this.instance[modelID]) {
28
+ await init();
29
+
30
+ self.postMessage({ status: "loading", message: "Loading Model" });
31
+ const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
32
+ await Promise.all([
33
+ fetchArrayBuffer(weightsURL),
34
+ fetchArrayBuffer(tokenizerURL),
35
+ fetchArrayBuffer(configURL),
36
+ ]);
37
+
38
+ this.instance[modelID] = new Model(
39
+ weightsArrayU8,
40
+ tokenizerArrayU8,
41
+ configArrayU8,
42
+ quantized
43
+ );
44
+ } else {
45
+ self.postMessage({ status: "ready", message: "Model Already Loaded" });
46
+ }
47
+ return this.instance[modelID];
48
+ }
49
+ }
50
+
51
+ self.addEventListener("message", async (event) => {
52
+ const { weightsURL, tokenizerURL, configURL, modelID, imageURL, quantized } =
53
+ event.data;
54
+ try {
55
+ self.postMessage({ status: "status", message: "Loading Blip Model..." });
56
+ const model = await Blip.getInstance(
57
+ weightsURL,
58
+ tokenizerURL,
59
+ configURL,
60
+ modelID,
61
+ quantized
62
+ );
63
+ self.postMessage({
64
+ status: "status",
65
+ message: "Running Blip Inference...",
66
+ });
67
+ const imageArrayU8 = await fetchArrayBuffer(imageURL, false);
68
+ const output = model.generate_caption_from_image(imageArrayU8);
69
+
70
+ self.postMessage({
71
+ status: "complete",
72
+ message: "complete",
73
+ output: output,
74
+ });
75
+ } catch (e) {
76
+ self.postMessage({ error: e });
77
+ }
78
+ });
build/m.d.ts ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ /**
4
+ */
5
+ export class Model {
6
+ free(): void;
7
+ /**
8
+ * @param {Uint8Array} weights
9
+ * @param {Uint8Array} tokenizer
10
+ * @param {Uint8Array} config
11
+ * @param {boolean} quantized
12
+ */
13
+ constructor(weights: Uint8Array, tokenizer: Uint8Array, config: Uint8Array, quantized: boolean);
14
+ /**
15
+ * @param {Uint8Array} image
16
+ * @returns {string}
17
+ */
18
+ generate_caption_from_image(image: Uint8Array): string;
19
+ }
20
+
21
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
22
+
23
+ export interface InitOutput {
24
+ readonly memory: WebAssembly.Memory;
25
+ readonly __wbg_model_free: (a: number) => void;
26
+ readonly model_load: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
27
+ readonly model_generate_caption_from_image: (a: number, b: number, c: number, d: number) => void;
28
+ readonly main: (a: number, b: number) => number;
29
+ readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
30
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
31
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
32
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
33
+ readonly __wbindgen_start: () => void;
34
+ }
35
+
36
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
37
+ /**
38
+ * Instantiates the given `module`, which can either be bytes or
39
+ * a precompiled `WebAssembly.Module`.
40
+ *
41
+ * @param {SyncInitInput} module
42
+ *
43
+ * @returns {InitOutput}
44
+ */
45
+ export function initSync(module: SyncInitInput): InitOutput;
46
+
47
+ /**
48
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
49
+ * for everything else, calls `WebAssembly.instantiate` directly.
50
+ *
51
+ * @param {InitInput | Promise<InitInput>} module_or_path
52
+ *
53
+ * @returns {Promise<InitOutput>}
54
+ */
55
+ export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>): Promise<InitOutput>;
build/m.js ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let wasm;
2
+
3
+ const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
4
+
5
+ if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
6
+
7
+ let cachedUint8Memory0 = null;
8
+
9
+ function getUint8Memory0() {
10
+ if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) {
11
+ cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer);
12
+ }
13
+ return cachedUint8Memory0;
14
+ }
15
+
16
+ function getStringFromWasm0(ptr, len) {
17
+ ptr = ptr >>> 0;
18
+ return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len));
19
+ }
20
+
21
+ const heap = new Array(128).fill(undefined);
22
+
23
+ heap.push(undefined, null, true, false);
24
+
25
+ let heap_next = heap.length;
26
+
27
+ function addHeapObject(obj) {
28
+ if (heap_next === heap.length) heap.push(heap.length + 1);
29
+ const idx = heap_next;
30
+ heap_next = heap[idx];
31
+
32
+ heap[idx] = obj;
33
+ return idx;
34
+ }
35
+
36
+ function getObject(idx) { return heap[idx]; }
37
+
38
+ function dropObject(idx) {
39
+ if (idx < 132) return;
40
+ heap[idx] = heap_next;
41
+ heap_next = idx;
42
+ }
43
+
44
+ function takeObject(idx) {
45
+ const ret = getObject(idx);
46
+ dropObject(idx);
47
+ return ret;
48
+ }
49
+
50
+ let WASM_VECTOR_LEN = 0;
51
+
52
+ function passArray8ToWasm0(arg, malloc) {
53
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
54
+ getUint8Memory0().set(arg, ptr / 1);
55
+ WASM_VECTOR_LEN = arg.length;
56
+ return ptr;
57
+ }
58
+
59
+ let cachedInt32Memory0 = null;
60
+
61
+ function getInt32Memory0() {
62
+ if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) {
63
+ cachedInt32Memory0 = new Int32Array(wasm.memory.buffer);
64
+ }
65
+ return cachedInt32Memory0;
66
+ }
67
+
68
+ const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
69
+
70
+ const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
71
+ ? function (arg, view) {
72
+ return cachedTextEncoder.encodeInto(arg, view);
73
+ }
74
+ : function (arg, view) {
75
+ const buf = cachedTextEncoder.encode(arg);
76
+ view.set(buf);
77
+ return {
78
+ read: arg.length,
79
+ written: buf.length
80
+ };
81
+ });
82
+
83
+ function passStringToWasm0(arg, malloc, realloc) {
84
+
85
+ if (realloc === undefined) {
86
+ const buf = cachedTextEncoder.encode(arg);
87
+ const ptr = malloc(buf.length, 1) >>> 0;
88
+ getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
89
+ WASM_VECTOR_LEN = buf.length;
90
+ return ptr;
91
+ }
92
+
93
+ let len = arg.length;
94
+ let ptr = malloc(len, 1) >>> 0;
95
+
96
+ const mem = getUint8Memory0();
97
+
98
+ let offset = 0;
99
+
100
+ for (; offset < len; offset++) {
101
+ const code = arg.charCodeAt(offset);
102
+ if (code > 0x7F) break;
103
+ mem[ptr + offset] = code;
104
+ }
105
+
106
+ if (offset !== len) {
107
+ if (offset !== 0) {
108
+ arg = arg.slice(offset);
109
+ }
110
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
111
+ const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
112
+ const ret = encodeString(arg, view);
113
+
114
+ offset += ret.written;
115
+ }
116
+
117
+ WASM_VECTOR_LEN = offset;
118
+ return ptr;
119
+ }
120
+ /**
121
+ */
122
+ export class Model {
123
+
124
+ static __wrap(ptr) {
125
+ ptr = ptr >>> 0;
126
+ const obj = Object.create(Model.prototype);
127
+ obj.__wbg_ptr = ptr;
128
+
129
+ return obj;
130
+ }
131
+
132
+ __destroy_into_raw() {
133
+ const ptr = this.__wbg_ptr;
134
+ this.__wbg_ptr = 0;
135
+
136
+ return ptr;
137
+ }
138
+
139
+ free() {
140
+ const ptr = this.__destroy_into_raw();
141
+ wasm.__wbg_model_free(ptr);
142
+ }
143
+ /**
144
+ * @param {Uint8Array} weights
145
+ * @param {Uint8Array} tokenizer
146
+ * @param {Uint8Array} config
147
+ * @param {boolean} quantized
148
+ */
149
+ constructor(weights, tokenizer, config, quantized) {
150
+ try {
151
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
152
+ const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
153
+ const len0 = WASM_VECTOR_LEN;
154
+ const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
155
+ const len1 = WASM_VECTOR_LEN;
156
+ const ptr2 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
157
+ const len2 = WASM_VECTOR_LEN;
158
+ wasm.model_load(retptr, ptr0, len0, ptr1, len1, ptr2, len2, quantized);
159
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
160
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
161
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
162
+ if (r2) {
163
+ throw takeObject(r1);
164
+ }
165
+ return Model.__wrap(r0);
166
+ } finally {
167
+ wasm.__wbindgen_add_to_stack_pointer(16);
168
+ }
169
+ }
170
+ /**
171
+ * @param {Uint8Array} image
172
+ * @returns {string}
173
+ */
174
+ generate_caption_from_image(image) {
175
+ let deferred3_0;
176
+ let deferred3_1;
177
+ try {
178
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
179
+ const ptr0 = passArray8ToWasm0(image, wasm.__wbindgen_malloc);
180
+ const len0 = WASM_VECTOR_LEN;
181
+ wasm.model_generate_caption_from_image(retptr, this.__wbg_ptr, ptr0, len0);
182
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
183
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
184
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
185
+ var r3 = getInt32Memory0()[retptr / 4 + 3];
186
+ var ptr2 = r0;
187
+ var len2 = r1;
188
+ if (r3) {
189
+ ptr2 = 0; len2 = 0;
190
+ throw takeObject(r2);
191
+ }
192
+ deferred3_0 = ptr2;
193
+ deferred3_1 = len2;
194
+ return getStringFromWasm0(ptr2, len2);
195
+ } finally {
196
+ wasm.__wbindgen_add_to_stack_pointer(16);
197
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
198
+ }
199
+ }
200
+ }
201
+
202
+ async function __wbg_load(module, imports) {
203
+ if (typeof Response === 'function' && module instanceof Response) {
204
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
205
+ try {
206
+ return await WebAssembly.instantiateStreaming(module, imports);
207
+
208
+ } catch (e) {
209
+ if (module.headers.get('Content-Type') != 'application/wasm') {
210
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
211
+
212
+ } else {
213
+ throw e;
214
+ }
215
+ }
216
+ }
217
+
218
+ const bytes = await module.arrayBuffer();
219
+ return await WebAssembly.instantiate(bytes, imports);
220
+
221
+ } else {
222
+ const instance = await WebAssembly.instantiate(module, imports);
223
+
224
+ if (instance instanceof WebAssembly.Instance) {
225
+ return { instance, module };
226
+
227
+ } else {
228
+ return instance;
229
+ }
230
+ }
231
+ }
232
+
233
+ function __wbg_get_imports() {
234
+ const imports = {};
235
+ imports.wbg = {};
236
+ imports.wbg.__wbindgen_error_new = function(arg0, arg1) {
237
+ const ret = new Error(getStringFromWasm0(arg0, arg1));
238
+ return addHeapObject(ret);
239
+ };
240
+ imports.wbg.__wbg_new_abda76e883ba8a5f = function() {
241
+ const ret = new Error();
242
+ return addHeapObject(ret);
243
+ };
244
+ imports.wbg.__wbg_stack_658279fe44541cf6 = function(arg0, arg1) {
245
+ const ret = getObject(arg1).stack;
246
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
247
+ const len1 = WASM_VECTOR_LEN;
248
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
249
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
250
+ };
251
+ imports.wbg.__wbg_error_f851667af71bcfc6 = function(arg0, arg1) {
252
+ let deferred0_0;
253
+ let deferred0_1;
254
+ try {
255
+ deferred0_0 = arg0;
256
+ deferred0_1 = arg1;
257
+ console.error(getStringFromWasm0(arg0, arg1));
258
+ } finally {
259
+ wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
260
+ }
261
+ };
262
+ imports.wbg.__wbindgen_object_drop_ref = function(arg0) {
263
+ takeObject(arg0);
264
+ };
265
+ imports.wbg.__wbg_log_751f7b214d870e8a = function(arg0, arg1) {
266
+ console.log(getStringFromWasm0(arg0, arg1));
267
+ };
268
+ imports.wbg.__wbg_now_9c5990bda04c7e53 = function() {
269
+ const ret = Date.now();
270
+ return ret;
271
+ };
272
+ imports.wbg.__wbindgen_throw = function(arg0, arg1) {
273
+ throw new Error(getStringFromWasm0(arg0, arg1));
274
+ };
275
+
276
+ return imports;
277
+ }
278
+
279
+ function __wbg_init_memory(imports, maybe_memory) {
280
+
281
+ }
282
+
283
+ function __wbg_finalize_init(instance, module) {
284
+ wasm = instance.exports;
285
+ __wbg_init.__wbindgen_wasm_module = module;
286
+ cachedInt32Memory0 = null;
287
+ cachedUint8Memory0 = null;
288
+
289
+ wasm.__wbindgen_start();
290
+ return wasm;
291
+ }
292
+
293
+ function initSync(module) {
294
+ if (wasm !== undefined) return wasm;
295
+
296
+ const imports = __wbg_get_imports();
297
+
298
+ __wbg_init_memory(imports);
299
+
300
+ if (!(module instanceof WebAssembly.Module)) {
301
+ module = new WebAssembly.Module(module);
302
+ }
303
+
304
+ const instance = new WebAssembly.Instance(module, imports);
305
+
306
+ return __wbg_finalize_init(instance, module);
307
+ }
308
+
309
+ async function __wbg_init(input) {
310
+ if (wasm !== undefined) return wasm;
311
+
312
+ if (typeof input === 'undefined') {
313
+ input = new URL('m_bg.wasm', import.meta.url);
314
+ }
315
+ const imports = __wbg_get_imports();
316
+
317
+ if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) {
318
+ input = fetch(input);
319
+ }
320
+
321
+ __wbg_init_memory(imports);
322
+
323
+ const { instance, module } = await __wbg_load(await input, imports);
324
+
325
+ return __wbg_finalize_init(instance, module);
326
+ }
327
+
328
+ export { initSync }
329
+ export default __wbg_init;
build/m_bg.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a0b451b714b0da374d5b574572f4847c3b4fe20cd1f46b1d236bac1125f084b
3
+ size 4523850
build/m_bg.wasm.d.ts ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export function __wbg_model_free(a: number): void;
5
+ export function model_load(a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number): void;
6
+ export function model_generate_caption_from_image(a: number, b: number, c: number, d: number): void;
7
+ export function main(a: number, b: number): number;
8
+ export function __wbindgen_add_to_stack_pointer(a: number): number;
9
+ export function __wbindgen_malloc(a: number, b: number): number;
10
+ export function __wbindgen_free(a: number, b: number, c: number): void;
11
+ export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
12
+ export function __wbindgen_start(): void;
index.html CHANGED
@@ -1,19 +1,393 @@
1
  <!DOCTYPE html>
2
  <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
  <!DOCTYPE html>
2
  <html>
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <style>
7
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200;300;400&family=Source+Sans+3:wght@100;200;300;400;500;600;700;800;900&display=swap");
8
+ html,
9
+ body {
10
+ font-family: "Source Sans 3", sans-serif;
11
+ }
12
+ </style>
13
+ <title>Candle Blip Image Captioning Demo</title>
14
+ <script src="https://cdn.tailwindcss.com"></script>
15
+ <script type="module" src="./code.js"></script>
16
+ <script type="module">
17
+ const MODELS = {
18
+ blip_image_quantized_q4k: {
19
+ base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/",
20
+ model: "blip-image-captioning-large-q4k.gguf",
21
+ config: "config.json",
22
+ tokenizer: "tokenizer.json",
23
+ quantized: true,
24
+ size: "271 MB",
25
+ },
26
+ blip_image_quantized_q80: {
27
+ base_url: "https://huggingface.co/lmz/candle-blip/resolve/main/",
28
+ model: "blip-image-captioning-large-q80.gguf",
29
+ config: "config.json",
30
+ tokenizer: "tokenizer.json",
31
+ quantized: true,
32
+ size: "505 MB",
33
+ },
34
+ blip_image_large: {
35
+ base_url:
36
+ "https://huggingface.co/Salesforce/blip-image-captioning-large/resolve/refs%2Fpr%2F18/",
37
+ model: "model.safetensors",
38
+ config: "config.json",
39
+ tokenizer: "tokenizer.json",
40
+ quantized: false,
41
+ size: "1.88 GB",
42
+ },
43
+ };
44
+
45
+ const blipWorker = new Worker("./blipWorker.js", {
46
+ type: "module",
47
+ });
48
+
49
+ const outputStatusEl = document.querySelector("#output-status");
50
+ const outputCaptionEl = document.querySelector("#output-caption");
51
+ const modelSelectEl = document.querySelector("#model");
52
+ const clearBtn = document.querySelector("#clear-btn");
53
+ const fileUpload = document.querySelector("#file-upload");
54
+ const dropArea = document.querySelector("#drop-area");
55
+ const imagesExamples = document.querySelector("#image-select");
56
+ const canvas = document.querySelector("#canvas");
57
+ const ctxCanvas = canvas.getContext("2d");
58
+
59
+ let isCaptioning = false;
60
+ let currentImageURL = null;
61
+ clearBtn.addEventListener("click", () => {
62
+ clearImageCanvas();
63
+ });
64
+ modelSelectEl.addEventListener("change", () => {
65
+ if (currentImageURL) {
66
+ runInference(currentImageURL);
67
+ }
68
+ });
69
+
70
+ //add event listener to file input
71
+ fileUpload.addEventListener("input", async (e) => {
72
+ const target = e.target;
73
+ if (target.files.length > 0) {
74
+ const href = URL.createObjectURL(target.files[0]);
75
+ clearImageCanvas();
76
+ await drawImageCanvas(href);
77
+ runInference(href);
78
+ }
79
+ });
80
+ // add event listener to drop-area
81
+ dropArea.addEventListener("dragenter", (e) => {
82
+ e.preventDefault();
83
+ dropArea.classList.add("border-blue-700");
84
+ });
85
+ dropArea.addEventListener("dragleave", (e) => {
86
+ e.preventDefault();
87
+ dropArea.classList.remove("border-blue-700");
88
+ });
89
+ dropArea.addEventListener("dragover", (e) => {
90
+ e.preventDefault();
91
+ });
92
+ dropArea.addEventListener("drop", async (e) => {
93
+ e.preventDefault();
94
+ dropArea.classList.remove("border-blue-700");
95
+ const url = e.dataTransfer.getData("text/uri-list");
96
+ const files = e.dataTransfer.files;
97
+
98
+ if (files.length > 0) {
99
+ const href = URL.createObjectURL(files[0]);
100
+ clearImageCanvas();
101
+ await drawImageCanvas(href);
102
+ runInference(href);
103
+ } else if (url) {
104
+ clearImageCanvas();
105
+ await drawImageCanvas(url);
106
+ runInference(url);
107
+ }
108
+ });
109
+
110
+ imagesExamples.addEventListener("click", async (e) => {
111
+ if (isCaptioning) {
112
+ return;
113
+ }
114
+ const target = e.target;
115
+ if (target.nodeName === "IMG") {
116
+ const href = target.src;
117
+ clearImageCanvas();
118
+ await drawImageCanvas(href);
119
+ runInference(href);
120
+ }
121
+ });
122
+ function clearImageCanvas() {
123
+ ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
124
+ isCaptioning = false;
125
+ clearBtn.disabled = true;
126
+ canvas.parentElement.style.height = "auto";
127
+ outputStatusEl.hidden = false;
128
+ outputCaptionEl.hidden = true;
129
+ outputStatusEl.innerText = "Please select an image";
130
+ currentImageURL = null;
131
+ }
132
+
133
+ async function drawImageCanvas(imgURL) {
134
+ if (!imgURL) {
135
+ throw new Error("No image URL provided");
136
+ }
137
+ return new Promise((resolve, reject) => {
138
+ ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
139
+ ctxCanvas.clearRect(0, 0, canvas.width, canvas.height);
140
+
141
+ const img = new Image();
142
+ img.crossOrigin = "anonymous";
143
+ img.onload = () => {
144
+ canvas.width = img.width;
145
+ canvas.height = img.height;
146
+ ctxCanvas.drawImage(img, 0, 0);
147
+ canvas.parentElement.style.height = canvas.offsetHeight + "px";
148
+ clearBtn.disabled = false;
149
+ resolve(img);
150
+ };
151
+ img.src = imgURL;
152
+ currentImageURL = imgURL;
153
+ });
154
+ }
155
+
156
+ document.addEventListener("DOMContentLoaded", () => {
157
+ for (const [id, model] of Object.entries(MODELS)) {
158
+ const option = document.createElement("option");
159
+ option.value = id;
160
+ option.innerText = `${id} (${model.size})`;
161
+ modelSelectEl.appendChild(option);
162
+ }
163
+ });
164
+ async function getImageCaption(
165
+ worker,
166
+ weightsURL,
167
+ tokenizerURL,
168
+ configURL,
169
+ modelID,
170
+ imageURL,
171
+ quantized,
172
+ updateStatus = null
173
+ ) {
174
+ return new Promise((resolve, reject) => {
175
+ worker.postMessage({
176
+ weightsURL,
177
+ tokenizerURL,
178
+ configURL,
179
+ modelID,
180
+ imageURL,
181
+ quantized,
182
+ });
183
+ function messageHandler(event) {
184
+ if ("error" in event.data) {
185
+ worker.removeEventListener("message", messageHandler);
186
+ reject(new Error(event.data.error));
187
+ }
188
+ if (event.data.status === "complete") {
189
+ worker.removeEventListener("message", messageHandler);
190
+ resolve(event.data);
191
+ }
192
+ if (updateStatus) updateStatus(event.data);
193
+ }
194
+ worker.addEventListener("message", messageHandler);
195
+ });
196
+ }
197
+ function updateStatus(data) {
198
+ if (data.status === "status") {
199
+ outputStatusEl.innerText = data.message;
200
+ }
201
+ }
202
+ async function runInference(imageURL) {
203
+ if (isCaptioning || !imageURL) {
204
+ alert("Please select an image first");
205
+ return;
206
+ }
207
+
208
+ outputStatusEl.hidden = false;
209
+ outputCaptionEl.hidden = true;
210
+ clearBtn.disabled = true;
211
+ modelSelectEl.disabled = true;
212
+ isCaptioning = true;
213
+ const selectedModel = modelSelectEl.value;
214
+ const model = MODELS[selectedModel];
215
+ const weightsURL = `${model.base_url}${model.model}`;
216
+ const tokenizerURL = `${model.base_url}${model.tokenizer}`;
217
+ const configURL = `${model.base_url}${model.config}`;
218
+ const quantized = model.quantized;
219
+ try {
220
+ const time = performance.now();
221
+ const caption = await getImageCaption(
222
+ blipWorker,
223
+ weightsURL,
224
+ tokenizerURL,
225
+ configURL,
226
+ selectedModel,
227
+ imageURL,
228
+ quantized,
229
+ updateStatus
230
+ );
231
+ outputStatusEl.hidden = true;
232
+ outputCaptionEl.hidden = false;
233
+ const totalTime = ((performance.now() - time)/1000).toFixed(2);
234
+ outputCaptionEl.innerHTML = `${
235
+ caption.output
236
+ }<br/><span class="text-xs">Inference time: ${totalTime} s</span>`;
237
+ } catch (err) {
238
+ console.error(err);
239
+ outputStatusEl.hidden = false;
240
+ outputCaptionEl.hidden = true;
241
+ outputStatusEl.innerText = err.message;
242
+ }
243
+ clearBtn.disabled = false;
244
+ modelSelectEl.disabled = false;
245
+ isCaptioning = false;
246
+ }
247
+ </script>
248
+ </head>
249
+ <body class="container max-w-4xl mx-auto p-4">
250
+ <main class="grid grid-cols-1 gap-5 relative">
251
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
252
+ <div>
253
+ <h1 class="text-5xl font-bold">Candle BLIP Image Captioning</h1>
254
+ <h2 class="text-2xl font-bold">Rust/WASM Demo</h2>
255
+ <p class="max-w-lg">
256
+ <a
257
+ href="https://huggingface.co/Salesforce/blip-image-captioning-large"
258
+ target="_blank"
259
+ class="underline hover:text-blue-500 hover:no-underline"
260
+ >BLIP Image Captioning
261
+ </a>
262
+ running in the browser using
263
+ <a
264
+ href="https://github.com/huggingface/candle/"
265
+ target="_blank"
266
+ class="underline hover:text-blue-500 hover:no-underline"
267
+ >Candle</a
268
+ >, a minimalist ML framework for Rust.
269
+ </p>
270
+ <p class="text-xs max-w-lg py-2">
271
+ <b>Note:</b>
272
+ The image captioning on the smallest model takes about ~50 seconds, it
273
+ will vary depending on your machine and model size.
274
+ </p>
275
+ </div>
276
+
277
+ <div>
278
+ <label for="model" class="font-medium block">Models Options: </label>
279
+ <select
280
+ id="model"
281
+ class="border-2 border-gray-500 rounded-md font-light interactive disabled:cursor-not-allowed w-full max-w-max"
282
+ ></select>
283
+ </div>
284
+ <!-- drag and drop area -->
285
+ <div class="grid gap-4 sm:grid-cols-2 py-4">
286
+ <div class="relative max-w-lg">
287
+ <div
288
+ class="absolute w-full bottom-full flex justify-between items-center"
289
+ >
290
+ <div class="flex gap-2 w-full">
291
+ <button
292
+ id="clear-btn"
293
+ disabled
294
+ title="Clear Image"
295
+ class="ml-auto text-xs bg-white rounded-md disabled:opacity-50 flex gap-1 items-center"
296
+ >
297
+ <svg
298
+ class=""
299
+ xmlns="http://www.w3.org/2000/svg"
300
+ viewBox="0 0 13 12"
301
+ height="1em"
302
+ >
303
+ <path
304
+ d="M1.6.7 12 11.1M12 .7 1.6 11.1"
305
+ stroke="#2E3036"
306
+ stroke-width="2"
307
+ />
308
+ </svg>
309
+ </button>
310
+ </div>
311
+ </div>
312
+ <div
313
+ id="drop-area"
314
+ class="flex flex-col items-center justify-center border-2 border-gray-300 border-dashed rounded-xl relative aspect-video w-full overflow-hidden"
315
+ >
316
+ <div
317
+ class="flex flex-col items-center justify-center space-y-1 text-center"
318
+ >
319
+ <svg
320
+ width="25"
321
+ height="25"
322
+ viewBox="0 0 25 25"
323
+ fill="none"
324
+ xmlns="http://www.w3.org/2000/svg"
325
+ >
326
+ <path
327
+ d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z"
328
+ fill="#000"
329
+ />
330
+ </svg>
331
+ <div class="flex text-sm text-gray-600">
332
+ <label
333
+ for="file-upload"
334
+ class="relative cursor-pointer bg-white rounded-md font-medium text-blue-950 hover:text-blue-700"
335
+ >
336
+ <span>Drag and drop y our image here</span>
337
+ <span class="block text-xs">or</span>
338
+ <span class="block text-xs">Click to upload</span>
339
+ </label>
340
+ </div>
341
+ <input
342
+ id="file-upload"
343
+ name="file-upload"
344
+ type="file"
345
+ class="sr-only"
346
+ />
347
+ </div>
348
+ <canvas
349
+ id="canvas"
350
+ class="absolute pointer-events-none w-full"
351
+ ></canvas>
352
+ </div>
353
+ </div>
354
+ <div class="">
355
+ <div
356
+ class="h-full bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2"
357
+ >
358
+ <p
359
+ id="output-caption"
360
+ class="m-auto text-xl text-center p-2"
361
+ hidden
362
+ ></p>
363
+ <span id="output-status" class="m-auto font-light">
364
+ Please select an image
365
+ </span>
366
+ </div>
367
+ </div>
368
+ </div>
369
+
370
+ <div>
371
+ <div
372
+ class="flex gap-3 items-center overflow-x-scroll"
373
+ id="image-select"
374
+ >
375
+ <h3 class="font-medium">Examples:</h3>
376
+
377
+ <img
378
+ src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/sf.jpg"
379
+ class="cursor-pointer w-24 h-24 object-cover"
380
+ />
381
+ <img
382
+ src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/bike.jpeg"
383
+ class="cursor-pointer w-24 h-24 object-cover"
384
+ />
385
+ <img
386
+ src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/candle/examples/000000000077.jpg"
387
+ class="cursor-pointer w-24 h-24 object-cover"
388
+ />
389
+ </div>
390
+ </div>
391
+ </main>
392
+ </body>
393
  </html>