Spaces:

suredream
/

segment_ui

Running

App Files Files Community

Jun Xiong commited on Jul 19

Commit

1bd7ddc

•

1 Parent(s): a7df02b

web

Browse files

Files changed (6) hide show

README.md +6 -6
index.css +116 -0
index.html +24 -13
index.js +268 -52
style.css +18 -66
worker.js +109 -0

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Segment Ui
-emoji: 🌐
-colorFrom: blue
-colorTo: yellow
 sdk: static
 pinned: false
 models:
-- Xenova/detr-resnet-50
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Segment Anything Web
+emoji: 💻
+colorFrom: green
+colorTo: blue
 sdk: static
 pinned: false
 models:
+ - Xenova/slimsam-77-uniform
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

index.css ADDED Viewed

	@@ -0,0 +1,116 @@

+* {
+    box-sizing: border-box;
+    padding: 0;
+    margin: 0;
+    font-family: sans-serif;
+}
+html,
+body {
+    height: 100%;
+}
+body {
+    padding: 16px 32px;
+}
+body,
+#container,
+#upload-button {
+    display: flex;
+    flex-direction: column;
+    justify-content: center;
+    align-items: center;
+}
+h1 {
+    text-align: center;
+}
+#container {
+    position: relative;
+    width: 640px;
+    height: 420px;
+    max-width: 100%;
+    max-height: 100%;
+    border: 2px dashed #D1D5DB;
+    border-radius: 0.75rem;
+    overflow: hidden;
+    cursor: pointer;
+    margin-top: 1rem;
+    background-size: 100% 100%;
+    background-position: center;
+    background-repeat: no-repeat;
+}
+#mask-output {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    pointer-events: none;
+}
+#upload-button {
+    gap: 0.4rem;
+    font-size: 18px;
+    cursor: pointer;
+}
+#upload {
+    display: none;
+}
+svg {
+    pointer-events: none;
+}
+#example {
+    font-size: 14px;
+    text-decoration: underline;
+    cursor: pointer;
+}
+#example:hover {
+    color: #2563EB;
+}
+canvas {
+    position: absolute;
+    width: 100%;
+    height: 100%;
+    opacity: 0.6;
+}
+#status {
+    min-height: 16px;
+    margin: 8px 0;
+}
+.icon {
+    height: 16px;
+    width: 16px;
+    position: absolute;
+    transform: translate(-50%, -50%);
+}
+#controls>button {
+    padding: 6px 12px;
+    background-color: #3498db;
+    color: white;
+    border: 1px solid #2980b9;
+    border-radius: 5px;
+    cursor: pointer;
+    font-size: 16px;
+}
+#controls>button:disabled {
+    background-color: #d1d5db;
+    color: #6b7280;
+    border: 1px solid #9ca3af;
+    cursor: not-allowed;
+}
+#information {
+    margin-top: 0.25rem;
+    font-size: 15px;
+}

index.html CHANGED Viewed

@@ -3,24 +3,35 @@
 <head>
     <meta charset="UTF-8" />
-    <link rel="stylesheet" href="style.css" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
-    <title>Transformers.js - Object Detection</title>
 </head>
 <body>
-    <h1>Object Detection w/ 🤗 Transformers.js</h1>
-    <label id="container" for="upload">
-        <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
-            <path fill="#000"
-                d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
-            </path>
-        </svg>
-        Click to upload image
-        <label id="example">(or try example)</label>
-    </label>
-    <label id="status">Loading model...</label>
     <input id="upload" type="file" accept="image/*" />
     <script src="index.js" type="module"></script>

 <head>
     <meta charset="UTF-8" />
+    <link rel="stylesheet" href="index.css" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Transformers.js - Segment Anything</title>
 </head>
 <body>
+    <h1>Segment Anything w/ 🤗 Transformers.js</h1>
+    <div id="container">
+        <label id="upload-button" for="upload">
+            <svg width="25" height="25" viewBox="0 0 25 25" fill="none" xmlns="http://www.w3.org/2000/svg">
+                <path fill="#000"
+                    d="M3.5 24.3a3 3 0 0 1-1.9-.8c-.5-.5-.8-1.2-.8-1.9V2.9c0-.7.3-1.3.8-1.9.6-.5 1.2-.7 2-.7h18.6c.7 0 1.3.2 1.9.7.5.6.7 1.2.7 2v18.6c0 .7-.2 1.4-.7 1.9a3 3 0 0 1-2 .8H3.6Zm0-2.7h18.7V2.9H3.5v18.7Zm2.7-2.7h13.3c.3 0 .5 0 .6-.3v-.7l-3.7-5a.6.6 0 0 0-.6-.2c-.2 0-.4 0-.5.3l-3.5 4.6-2.4-3.3a.6.6 0 0 0-.6-.3c-.2 0-.4.1-.5.3l-2.7 3.6c-.1.2-.2.4 0 .7.1.2.3.3.6.3Z">
+                </path>
+            </svg>
+            Click to upload image
+            <label id="example">(or try example)</label>
+        </label>
+        <canvas id="mask-output"></canvas>
+    </div>
+    <label id="status"></label>
+    <div id="controls">
+        <button id="reset-image">Reset image</button>
+        <button id="clear-points">Clear points</button>
+        <button id="cut-mask" disabled>Cut mask</button>
+    </div>
+    <p id="information">
+        Left click = positive points, right click = negative points.
+    </p>
     <input id="upload" type="file" accept="image/*" />
     <script src="index.js" type="module"></script>

index.js CHANGED Viewed

@@ -1,26 +1,165 @@
-import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
-// Since we will download the model from the Hugging Face Hub, we can skip the local model check
-env.allowLocalModels = false;
-// Reference the elements that we will need
-const status = document.getElementById('status');
 const fileUpload = document.getElementById('upload');
 const imageContainer = document.getElementById('container');
 const example = document.getElementById('example');
-const EXAMPLE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/city-streets.jpg';
-// Create a new object detection pipeline
-status.textContent = 'Loading model...';
-const detector = await pipeline('object-detection', 'Xenova/detr-resnet-50');
-status.textContent = 'Ready';
-example.addEventListener('click', (e) => {
-    e.preventDefault();
-    detect(EXAMPLE_URL);
 });
 fileUpload.addEventListener('change', function (e) {
     const file = e.target.files[0];
     if (!file) {
@@ -30,50 +169,127 @@ fileUpload.addEventListener('change', function (e) {
     const reader = new FileReader();
     // Set up a callback when the file is loaded
-    reader.onload = e2 => detect(e2.target.result);
     reader.readAsDataURL(file);
 });
-// Detect objects in the image
-async function detect(img) {
-    imageContainer.innerHTML = '';
-    imageContainer.style.backgroundImage = `url(${img})`;
-    status.textContent = 'Analysing...';
-    const output = await detector(img, {
-        threshold: 0.5,
-        percentage: true,
-    });
-    status.textContent = '';
-    output.forEach(renderBox);
 }
-// Render a bounding box and label on the image
-function renderBox({ box, label }) {
-    const { xmax, xmin, ymax, ymin } = box;
-    // Generate a random color for the box
-    const color = '#' + Math.floor(Math.random() * 0xFFFFFF).toString(16).padStart(6, 0);
-    // Draw the box
-    const boxElement = document.createElement('div');
-    boxElement.className = 'bounding-box';
-    Object.assign(boxElement.style, {
-        borderColor: color,
-        left: 100 * xmin + '%',
-        top: 100 * ymin + '%',
-        width: 100 * (xmax - xmin) + '%',
-        height: 100 * (ymax - ymin) + '%',
-    })
-    // Draw label
-    const labelElement = document.createElement('span');
-    labelElement.textContent = label;
-    labelElement.className = 'bounding-box-label';
-    labelElement.style.backgroundColor = color;
-    boxElement.appendChild(labelElement);
-    imageContainer.appendChild(boxElement);
 }

+// Reference the elements we will use
+const statusLabel = document.getElementById('status');
 const fileUpload = document.getElementById('upload');
 const imageContainer = document.getElementById('container');
 const example = document.getElementById('example');
+const maskCanvas = document.getElementById('mask-output');
+const uploadButton = document.getElementById('upload-button');
+const resetButton = document.getElementById('reset-image');
+const clearButton = document.getElementById('clear-points');
+const cutButton = document.getElementById('cut-mask');
+// State variables
+let lastPoints = null;
+let isEncoded = false;
+let isDecoding = false;
+let isMultiMaskMode = false;
+let modelReady = false;
+let imageDataURI = null;
+// Constants
+const BASE_URL = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/';
+const EXAMPLE_URL = BASE_URL + 'corgi.jpg';
+// Create a web worker so that the main (UI) thread is not blocked during inference.
+const worker = new Worker('worker.js', {
+    type: 'module',
+});
+// Preload star and cross images to avoid lag on first click
+const star = new Image();
+star.src = BASE_URL + 'star-icon.png';
+star.className = 'icon';
+const cross = new Image();
+cross.src = BASE_URL + 'cross-icon.png';
+cross.className = 'icon';
+// Set up message handler
+worker.addEventListener('message', (e) => {
+    const { type, data } = e.data;
+    if (type === 'ready') {
+        modelReady = true;
+        statusLabel.textContent = 'Ready';
+    } else if (type === 'decode_result') {
+        isDecoding = false;
+        if (!isEncoded) {
+            return; // We are not ready to decode yet
+        }
+        if (!isMultiMaskMode && lastPoints) {
+            // Perform decoding with the last point
+            decode();
+            lastPoints = null;
+        }
+        const { mask, scores } = data;
+        // Update canvas dimensions (if different)
+        if (maskCanvas.width !== mask.width || maskCanvas.height !== mask.height) {
+            maskCanvas.width = mask.width;
+            maskCanvas.height = mask.height;
+        }
+        // Create context and allocate buffer for pixel data
+        const context = maskCanvas.getContext('2d');
+        const imageData = context.createImageData(maskCanvas.width, maskCanvas.height);
+        // Select best mask
+        const numMasks = scores.length; // 3
+        let bestIndex = 0;
+        for (let i = 1; i < numMasks; ++i) {
+            if (scores[i] > scores[bestIndex]) {
+                bestIndex = i;
+            }
+        }
+        statusLabel.textContent = `Segment score: ${scores[bestIndex].toFixed(2)}`;
+        // Fill mask with colour
+        const pixelData = imageData.data;
+        for (let i = 0; i < pixelData.length; ++i) {
+            if (mask.data[numMasks * i + bestIndex] === 1) {
+                const offset = 4 * i;
+                pixelData[offset] = 0;       // red
+                pixelData[offset + 1] = 114; // green
+                pixelData[offset + 2] = 189; // blue
+                pixelData[offset + 3] = 255; // alpha
+            }
+        }
+        // Draw image data to context
+        context.putImageData(imageData, 0, 0);
+    } else if (type === 'segment_result') {
+        if (data === 'start') {
+            statusLabel.textContent = 'Extracting image embedding...';
+        } else {
+            statusLabel.textContent = 'Embedding extracted!';
+            isEncoded = true;
+        }
+    }
 });
+function decode() {
+    isDecoding = true;
+    worker.postMessage({ type: 'decode', data: lastPoints });
+}
+function clearPointsAndMask() {
+    // Reset state
+    isMultiMaskMode = false;
+    lastPoints = null;
+    // Remove points from previous mask (if any)
+    document.querySelectorAll('.icon').forEach(e => e.remove());
+    // Disable cut button
+    cutButton.disabled = true;
+    // Reset mask canvas
+    maskCanvas.getContext('2d').clearRect(0, 0, maskCanvas.width, maskCanvas.height);
+}
+clearButton.addEventListener('click', clearPointsAndMask);
+resetButton.addEventListener('click', () => {
+    // Update state
+    isEncoded = false;
+    imageDataURI = null;
+    // Indicate to worker that we have reset the state
+    worker.postMessage({ type: 'reset' });
+    // Clear points and mask (if present)
+    clearPointsAndMask();
+    // Update UI
+    cutButton.disabled = true;
+    imageContainer.style.backgroundImage = 'none';
+    uploadButton.style.display = 'flex';
+    statusLabel.textContent = 'Ready';
+});
+function segment(data) {
+    // Update state
+    isEncoded = false;
+    if (!modelReady) {
+        statusLabel.textContent = 'Loading model...';
+    }
+    imageDataURI = data;
+    // Update UI
+    imageContainer.style.backgroundImage = `url(${data})`;
+    uploadButton.style.display = 'none';
+    cutButton.disabled = true;
+    // Instruct worker to segment the image
+    worker.postMessage({ type: 'segment', data });
+}
+// Handle file selection
 fileUpload.addEventListener('change', function (e) {
     const file = e.target.files[0];
     if (!file) {
     const reader = new FileReader();
     // Set up a callback when the file is loaded
+    reader.onload = e2 => segment(e2.target.result);
     reader.readAsDataURL(file);
 });
+example.addEventListener('click', (e) => {
+    e.preventDefault();
+    segment(EXAMPLE_URL);
+});
+function addIcon({ point, label }) {
+    const icon = (label === 1 ? star : cross).cloneNode();
+    icon.style.left = `${point[0] * 100}%`;
+    icon.style.top = `${point[1] * 100}%`;
+    imageContainer.appendChild(icon);
+}
+// Attach hover event to image container
+imageContainer.addEventListener('mousedown', e => {
+    if (e.button !== 0 && e.button !== 2) {
+        return; // Ignore other buttons
+    }
+    if (!isEncoded) {
+        return; // Ignore if not encoded yet
+    }
+    if (!isMultiMaskMode) {
+        lastPoints = [];
+        isMultiMaskMode = true;
+        cutButton.disabled = false;
+    }
+    const point = getPoint(e);
+    lastPoints.push(point);
+    // add icon
+    addIcon(point);
+    decode();
+});
+// Clamp a value inside a range [min, max]
+function clamp(x, min = 0, max = 1) {
+    return Math.max(Math.min(x, max), min)
 }
+function getPoint(e) {
+    // Get bounding box
+    const bb = imageContainer.getBoundingClientRect();
+    // Get the mouse coordinates relative to the container
+    const mouseX = clamp((e.clientX - bb.left) / bb.width);
+    const mouseY = clamp((e.clientY - bb.top) / bb.height);
+    return {
+        point: [mouseX, mouseY],
+        label: e.button === 2 // right click
+            ? 0  // negative prompt
+            : 1, // positive prompt
+    }
 }
+// Do not show context menu on right click
+imageContainer.addEventListener('contextmenu', e => {
+    e.preventDefault();
+});
+// Attach hover event to image container
+imageContainer.addEventListener('mousemove', e => {
+    if (!isEncoded || isMultiMaskMode) {
+        // Ignore mousemove events if the image is not encoded yet,
+        // or we are in multi-mask mode
+        return;
+    }
+    lastPoints = [getPoint(e)];
+    if (!isDecoding) {
+        decode(); // Only decode if we are not already decoding
+    }
+});
+// Handle cut button click
+cutButton.addEventListener('click', () => {
+    const [w, h] = [maskCanvas.width, maskCanvas.height];
+    // Get the mask pixel data
+    const maskContext = maskCanvas.getContext('2d');
+    const maskPixelData = maskContext.getImageData(0, 0, w, h);
+    // Load the image
+    const image = new Image();
+    image.crossOrigin = 'anonymous';
+    image.onload = async () => {
+        // Create a new canvas to hold the image
+        const imageCanvas = new OffscreenCanvas(w, h);
+        const imageContext = imageCanvas.getContext('2d');
+        imageContext.drawImage(image, 0, 0, w, h);
+        const imagePixelData = imageContext.getImageData(0, 0, w, h);
+        // Create a new canvas to hold the cut-out
+        const cutCanvas = new OffscreenCanvas(w, h);
+        const cutContext = cutCanvas.getContext('2d');
+        const cutPixelData = cutContext.getImageData(0, 0, w, h);
+        // Copy the image pixel data to the cut canvas
+        for (let i = 3; i < maskPixelData.data.length; i += 4) {
+            if (maskPixelData.data[i] > 0) {
+                for (let j = 0; j < 4; ++j) {
+                    const offset = i - j;
+                    cutPixelData.data[offset] = imagePixelData.data[offset];
+                }
+            }
+        }
+        cutContext.putImageData(cutPixelData, 0, 0);
+        // Download image
+        const link = document.createElement('a');
+        link.download = 'image.png';
+        link.href = URL.createObjectURL(await cutCanvas.convertToBlob());
+        link.click();
+        link.remove();
+    }
+    image.src = imageDataURI;
+});

style.css CHANGED Viewed

@@ -1,76 +1,28 @@
-* {
-    box-sizing: border-box;
-    padding: 0;
-    margin: 0;
-    font-family: sans-serif;
-}
-html,
-body {
-    height: 100%;
-}
 body {
-    padding: 32px;
 }
-body,
-#container {
-    display: flex;
-    flex-direction: column;
-    justify-content: center;
-    align-items: center;
 }
-#container {
-    position: relative;
-    gap: 0.4rem;
-    width: 640px;
-    height: 640px;
-    max-width: 100%;
-    max-height: 100%;
-    border: 2px dashed #D1D5DB;
-    border-radius: 0.75rem;
-    overflow: hidden;
-    cursor: pointer;
-    margin: 1rem;
-    background-size: 100% 100%;
-    background-position: center;
-    background-repeat: no-repeat;
-    font-size: 18px;
 }
-#upload {
-    display: none;
 }
-svg {
-    pointer-events: none;
 }
-#example {
-    font-size: 14px;
-    text-decoration: underline;
-    cursor: pointer;
-}
-#example:hover {
-    color: #2563EB;
-}
-.bounding-box {
-    position: absolute;
-    box-sizing: border-box;
-    border: solid 2px;
-}
-.bounding-box-label {
-    color: white;
-    position: absolute;
-    font-size: 12px;
-    margin: -16px 0 0 -2px;
-    padding: 1px;
-}

 body {
+	padding: 2rem;
+	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
 }
+h1 {
+	font-size: 16px;
+	margin-top: 0;
 }
+p {
+	color: rgb(107, 114, 128);
+	font-size: 15px;
+	margin-bottom: 10px;
+	margin-top: 5px;
 }
+.card {
+	max-width: 620px;
+	margin: 0 auto;
+	padding: 16px;
+	border: 1px solid lightgray;
+	border-radius: 16px;
 }
+.card p:last-child {
+	margin-bottom: 0;
 }

worker.js ADDED Viewed

	@@ -0,0 +1,109 @@

+import { env, SamModel, AutoProcessor, RawImage, Tensor } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
+// Since we will download the model from the Hugging Face Hub, we can skip the local model check
+env.allowLocalModels = false;
+// We adopt the singleton pattern to enable lazy-loading of the model and processor.
+export class SegmentAnythingSingleton {
+    static model_id = 'Xenova/slimsam-77-uniform';
+    static model;
+    static processor;
+    static quantized = true;
+    static getInstance() {
+        if (!this.model) {
+            this.model = SamModel.from_pretrained(this.model_id, {
+                quantized: this.quantized,
+            });
+        }
+        if (!this.processor) {
+            this.processor = AutoProcessor.from_pretrained(this.model_id);
+        }
+        return Promise.all([this.model, this.processor]);
+    }
+}
+// State variables
+let image_embeddings = null;
+let image_inputs = null;
+let ready = false;
+self.onmessage = async (e) => {
+    const [model, processor] = await SegmentAnythingSingleton.getInstance();
+    if (!ready) {
+        // Indicate that we are ready to accept requests
+        ready = true;
+        self.postMessage({
+            type: 'ready',
+        });
+    }
+    const { type, data } = e.data;
+    if (type === 'reset') {
+        image_inputs = null;
+        image_embeddings = null;
+    } else if (type === 'segment') {
+        // Indicate that we are starting to segment the image
+        self.postMessage({
+            type: 'segment_result',
+            data: 'start',
+        });
+        // Read the image and recompute image embeddings
+        const image = await RawImage.read(e.data.data);
+        image_inputs = await processor(image);
+        image_embeddings = await model.get_image_embeddings(image_inputs)
+        // Indicate that we have computed the image embeddings, and we are ready to accept decoding requests
+        self.postMessage({
+            type: 'segment_result',
+            data: 'done',
+        });
+    } else if (type === 'decode') {
+        // Prepare inputs for decoding
+        const reshaped = image_inputs.reshaped_input_sizes[0];
+        const points = data.map(x => [x.point[0] * reshaped[1], x.point[1] * reshaped[0]])
+        const labels = data.map(x => BigInt(x.label));
+        const input_points = new Tensor(
+            'float32',
+            points.flat(Infinity),
+            [1, 1, points.length, 2],
+        )
+        const input_labels = new Tensor(
+            'int64',
+            labels.flat(Infinity),
+            [1, 1, labels.length],
+        )
+        // Generate the mask
+        const outputs = await model({
+            ...image_embeddings,
+            input_points,
+            input_labels,
+        })
+        // Post-process the mask
+        const masks = await processor.post_process_masks(
+            outputs.pred_masks,
+            image_inputs.original_sizes,
+            image_inputs.reshaped_input_sizes,
+        );
+        // Send the result back to the main thread
+        self.postMessage({
+            type: 'decode_result',
+            data: {
+                mask: RawImage.fromTensor(masks[0][0]),
+                scores: outputs.iou_scores.data,
+            },
+        });
+    } else {
+        throw new Error(`Unknown message type: ${type}`);
+    }
+}