Spaces:

jordyvl
/

viz_bdpc

Runtime error

App Files Files Community

jordyvl commited on Oct 30, 2023

Commit

6720717

•

1 Parent(s): eab9f1c

Create app.py

Browse files

still to adapt with sliders, base functions added

Files changed (1) hide show

app.py +164 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+from pathlib import Path
+import pandas as pd
+import gradio as gr
+from collections import OrderedDict
+from PIL import Image, ImageDraw, ImageFont
+from io import BytesIO
+import PyPDF2
+import pdf2image
+MAX_PAGES = 50
+MAX_PDF_SIZE = 100000000  # almost 100MB
+MIN_WIDTH, MIN_HEIGHT = 150, 150
+def equal_image_grid(images):
+    def compute_grid(n, max_cols=6):
+        equalDivisor = int(n**0.5)
+        cols = min(equalDivisor, max_cols)
+        rows = equalDivisor
+        if rows * cols >= n:
+            return rows, cols
+        cols += 1
+        if rows * cols >= n:
+            return rows, cols
+        while rows * cols < n:
+            rows += 1
+        return rows, cols
+    # assert len(images) == rows*cols
+    rows, cols = compute_grid(len(images))
+    # rescaling to min width [height padding]
+    images = [im for im in images if (im.height > 0) and (im.width > 0)]  # could be NA
+    min_width = min(im.width for im in images)
+    images = [im.resize((min_width, int(im.height * min_width / im.width)), resample=Image.BICUBIC) for im in images]
+    w, h = max([img.size[0] for img in images]), max([img.size[1] for img in images])
+    grid = Image.new("RGB", size=(cols * w, rows * h))
+    grid_w, grid_h = grid.size
+    for i, img in enumerate(images):
+        grid.paste(img, box=(i % cols * w, i // cols * h))
+    return grid
+def add_pagenumbers(im_list, height_scale=40):
+    def add_pagenumber(image, i):
+        width, height = image.size
+        draw = ImageDraw.Draw(image)
+        fontsize = int((width * height) ** (0.5) / height_scale)
+        font = ImageFont.truetype("Arial.ttf", fontsize)
+        margin = int(2 * fontsize)
+        draw.text(
+            (width - margin, height - margin),
+            str(i + 1),
+            fill="#D00917",
+            font=font,
+            spacing=4,
+            align="right",
+        )
+    for i, image in enumerate(im_list):
+        add_pagenumber(image, i)
+def pdf_to_grid(pdf_path):
+    reader = PyPDF2.PdfReader(pdf_path)
+    reached_page_limit = False
+    images = []
+    try:
+        for p, page in enumerate(reader.pages):
+            if reached_page_limit:
+                break
+            for image in page.images:
+                im = Image.open(BytesIO(image.data))
+                if im.width < MIN_WIDTH and im.height < MIN_HEIGHT:
+                    continue
+                images.append(im)
+    except Exception as e:
+        print(f"{pdf_path} PyPDF get_images {e}")
+        images = pdf2image.convert_from_path(pdf_path)
+    # simpler but slower
+    # images = pdf2image.convert_from_path(pdf_path)
+    if len(images) == 0:
+        return None
+    add_pagenumbers(images)
+    return equal_image_grid(images)
+def main(complexity, evidence, form, operation, type):
+    # need to write a query on diagnostic test and sample from it based on slider values
+    # then return the sample
+    query = " and ".join(
+        [
+            f"{cat}_{val} == {True}"
+            for cat, val in zip(meta_cats.keys(), [complexity, evidence, form, operation, type])
+            if val
+        ]
+    )
+    results = DIAGNOSTIC_TEST.query(query)
+    if len(results) == 0:
+        return f"No results found for query {query}", "", "", "", ""
+    for i, sample in results.sample(frac=1).iterrows():
+        if not sample['nhash']:
+            continue
+        print("Sampled: ", sample["nhash"])
+        # first get PDF file
+        PDF, grid = None, None
+        pdf_path = PDF_PATH / "test" / (sample["nhash"] + ".pdf")
+        if not os.path.exists(pdf_path):
+            continue
+        PDF = pdf_path
+        grid = pdf_to_grid(pdf_path)
+        if not grid:
+            continue
+        question, answer = sample["label"] #might need to translate
+        return label, grid, PDF
+_CLASSES = [
+    "letter",
+    "form",
+    "email",
+    "handwritten",
+    "advertisement",
+    "scientific report",
+    "scientific publication",
+    "specification",
+    "file folder",
+    "news article",
+    "budget",
+    "invoice",
+    "presentation",
+    "questionnaire",
+    "resume",
+    "memo",
+]
+# test
+# l, im, f = main(*slider_defaults)
+#load both datasets in memory? --> easier retrieval
+meta_cats = {'dataset': ['rvl_cdip', 'rvl_cdip_N'],
+             'label': _CLASSES
+            }
+sliders = [gr.Dropdown(choices=choices, value=choices[-1], label=label) for label, choices in meta_cats.items()]
+slider_defaults = [None, "visual_checkbox", None, None, None]  # [slider.value for slider in sliders]
+outputs = [
+    gr.Textbox(label="label"),
+    gr.Image(label="image grid of PDF"),
+    gr.File(label="PDF"),
+]
+iface = gr.Interface(fn=main, inputs=sliders, outputs=outputs, description="Visualize PDF samples from multi-page (PDF) document classification datasets", title='Beyond Document Page Classification: Examples')
+iface.launch(share=True)