Oblix's picture
[Automated] Update base model metadata (#1)
0712863 verified
metadata
base_model: DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet
library_name: transformers.js
license: gpl-3.0
pipeline_tag: object-detection

https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet with ONNX weights to be compatible with Transformers.js.

Usage (Transformers.js)

If you haven't already, you can install the Transformers.js JavaScript library from NPM using:

npm i @xenova/transformers

Example: Perform object-detection with Oblix/yolov8x-doclaynet_ONNX.

import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';

const model = await AutoModel.from_pretrained(
    "Oblix/yolov8x-doclaynet_ONNX",
    {
        quantized: false,
    }
);
const processor = await AutoProcessor.from_pretrained("Oblix/yolov8x-doclaynet_ONNX");

const url = 'https://huggingface.co/DILHTWD/documentlayoutsegmentation_YOLOv8_ondoclaynet/resolve/main/sample1.png';
const rawImage = await RawImage.fromURL(url);
const { pixel_values } = await processor(rawImage);
const output = await model({ images: pixel_values });

// Post-process:
const permuted = output.output0[0].transpose(1, 0);
// `permuted` is a Tensor of shape [ 8400, 15 ]:
// - 8400 potential bounding boxes
// - 15 parameters for each box:
//   - first 4 are coordinates for the bounding boxes (x-center, y-center, width, height)
//   - the remaining 11 are the probabilities for each class

// Example code to format it nicely:
const results = [];
const threshold = 0.5; // Adjust the threshold as needed
const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2);
for (const [xc, yc, w, h, ...scores] of permuted.tolist()) {

    // Get pixel values, taking into account the original image size
    const x1 = (xc - w/2) / scaledWidth * rawImage.width;
    const y1 = (yc - h/2) / scaledHeight * rawImage.height;
    const x2 = (xc + w/2) / scaledWidth * rawImage.width;
    const y2 = (yc + h/2) / scaledHeight * rawImage.height;

    // Get best class
    const argmax = scores.reduce((maxIndex, currentVal, currentIndex, arr) => currentVal > arr[maxIndex] ? currentIndex : maxIndex, 0);
    const score = scores[argmax];
    if (score < threshold) continue; // Not confident enough

    const label = model.config.id2label[argmax];
    results.push({
        x1, x2, y1, y2, score, label, index: argmax,
    });
}

const iouThreshold = 0.5; // Adjust the threshold as needed
const filteredResults = removeDuplicates(results, iouThreshold);
console.log(filteredResults);

function removeDuplicates(detections, iouThreshold) {
    const filteredDetections = [];

    for (const detection of detections) {
        let isDuplicate = false;
        let duplicateIndex = -1;
        let maxIoU = 0;

        for (let i = 0; i < filteredDetections.length; i++) {
            const filteredDetection = filteredDetections[i];
            const iou = calculateIoU(detection, filteredDetection);
            if (iou > iouThreshold) {
                isDuplicate = true;
                if (iou > maxIoU) {
                    maxIoU = iou;
                    duplicateIndex = i;
                }
            }
        }

        if (!isDuplicate) {
            filteredDetections.push(detection);
        } else if (duplicateIndex !== -1) {
            if (detection.score > filteredDetections[duplicateIndex].score) {
                filteredDetections[duplicateIndex] = detection;
            }
        }
    }

    return filteredDetections;
}

function calculateIoU(detection1, detection2) {
    const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1));
    const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1));
    const overlapArea = xOverlap * yOverlap;

    const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1);
    const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1);
    const unionArea = area1 + area2 - overlapArea;

    return overlapArea / unionArea;
}

Result

[
    {
        "x1": 54.53195288479328,
        "y1": 170.06781649589539,
        "x2": 95.52642979323865,
        "y2": 186.62115139961244,
        "score": 0.8901662826538086,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.96503926515579,
        "y1": 195.67131299972536,
        "x2": 221.8717828631401,
        "y2": 212.6188931465149,
        "score": 0.8967247605323792,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 54.53195288479328,
        "y1": 221.1506155014038,
        "x2": 98.4759178608656,
        "y2": 238.44384784698488,
        "score": 0.8795284032821655,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.731045877933504,
        "y1": 338.1506155014038,
        "x2": 103.58089088201523,
        "y2": 355.22782917022704,
        "score": 0.9104153513908386,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.501348263025285,
        "y1": 452.59601612091063,
        "x2": 144.76493505835532,
        "y2": 469.1547849655152,
        "score": 0.9181555509567261,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.37510642111301,
        "y1": 568.1918724060059,
        "x2": 73.67877252995967,
        "y2": 584.1619010925293,
        "score": 0.899300754070282,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 54.27563991844654,
        "y1": 840.2569072723389,
        "x2": 70.35437833964825,
        "y2": 859.4512378692626,
        "score": 0.6805046796798706,
        "label": "Section-header",
        "index": 7
    },
    {
        "x1": 309.2861147403717,
        "y1": 908.7717830657958,
        "x2": 373.8879840373993,
        "y2": 922.6841892242431,
        "score": 0.8969672918319702,
        "label": "Page-footer",
        "index": 4
    },
    {
        "x1": 311.53335428237915,
        "y1": 10.31740515232086,
        "x2": 607.2475433349609,
        "y2": 33.85392036437988,
        "score": 0.9498511552810669,
        "label": "Page-header",
        "index": 5
    },
    {
        "x1": 56.66784882545471,
        "y1": 289.38916368484496,
        "x2": 416.7734823703766,
        "y2": 306.94164075851444,
        "score": 0.856067419052124,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 56.03344459533691,
        "y1": 309.5055012702942,
        "x2": 317.7232768535614,
        "y2": 325.49175367355343,
        "score": 0.8314194083213806,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.00637502670288,
        "y1": 429.9619674682617,
        "x2": 414.61163306236267,
        "y2": 445.95904312133786,
        "score": 0.8927980661392212,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.619012689590456,
        "y1": 638.6609138488769,
        "x2": 384.32462439537045,
        "y2": 656.8182655334473,
        "score": 0.9029342532157898,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 58.06927928924561,
        "y1": 794.932172012329,
        "x2": 520.523375415802,
        "y2": 811.1884700775146,
        "score": 0.9037705063819885,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 54.25830144882202,
        "y1": 76.01902542114259,
        "x2": 552.8331304550171,
        "y2": 158.67227897644042,
        "score": 0.9725438356399536,
        "label": "Title",
        "index": 10
    },
    {
        "x1": 53.636448097229,
        "y1": 244.93504171371458,
        "x2": 610.1452471733094,
        "y2": 274.8768593788147,
        "score": 0.8954038619995117,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 54.76330833435059,
        "y1": 364.74734601974484,
        "x2": 625.0439935684204,
        "y2": 405.74994478225705,
        "score": 0.7930819988250732,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 55.78299608230591,
        "y1": 480.10940895080563,
        "x2": 623.4623931884765,
        "y2": 556.692225265503,
        "score": 0.9482676982879639,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 52.160629177093504,
        "y1": 593.5841983795166,
        "x2": 609.7405840873719,
        "y2": 635.7749668121338,
        "score": 0.9440742135047913,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 53.12467575073242,
        "y1": 654.1885282516479,
        "x2": 615.2034725189209,
        "y2": 697.286619758606,
        "score": 0.9134702086448669,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 52.52786092758179,
        "y1": 712.9350305557251,
        "x2": 622.7321027755737,
        "y2": 754.2832815170287,
        "score": 0.9259238243103027,
        "label": "Text",
        "index": 9
    },
    {
        "x1": 56.837522792816166,
        "y1": 758.6981185913086,
        "x2": 607.179635810852,
        "y2": 787.9486541748047,
        "score": 0.9015638828277588,
        "label": "List-item",
        "index": 3
    },
    {
        "x1": 56.57186779975891,
        "y1": 810.8556049346925,
        "x2": 446.48612236976624,
        "y2": 828.0084697723388,
        "score": 0.8806689977645874,
        "label": "List-item",
        "index": 3
    }
]

image/png

Labels

  • Caption
  • Footnote
  • Formula
  • List-item
  • Page-footer
  • Page-header
  • Picture
  • Section-header
  • Table
  • Text
  • Title