|
import gradio as gr |
|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
import requests |
|
from PIL import Image |
|
|
|
url = 'https://huggingface.co/yhshin/latex-ocr/raw/main/tokenizer-wordlevel.json' |
|
r = requests.get(url) |
|
open('tokenizer-wordlevel.json' , 'wb').write(r.content) |
|
|
|
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed") |
|
model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr") |
|
|
|
from tokenizers import Tokenizer |
|
tokenizer = Tokenizer.from_file("tokenizer-wordlevel.json") |
|
|
|
|
|
|
|
def process_image(image): |
|
|
|
pixel_values = processor(image, return_tensors="pt").pixel_values |
|
|
|
|
|
generated_ids = model.generate(pixel_values) |
|
|
|
|
|
generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0] |
|
|
|
|
|
generated_text = generated_text.replace(" ", "") |
|
|
|
return generated_text |
|
|
|
|
|
|
|
|
|
title = "Convert image to LaTeX source code" |
|
|
|
with open('article.md',mode='r') as file: |
|
article = file.read() |
|
|
|
description = """ |
|
This is a demo of machine learning model trained to reconstruct the LaTeX source code of an equation from an image. |
|
To use it, simply upload an image or use one of the example images below and click 'submit'. |
|
Results will show up in a few seconds. |
|
|
|
Try rendering the generated LaTeX [here](https://quicklatex.com/) to compare with the original. |
|
(The model is not perfect yet, so you may need to edit the resulting LaTeX a bit to get it to render a good match.) |
|
|
|
""" |
|
|
|
examples = [ |
|
[ "examples/1d32874f02.png" ], |
|
[ "examples/1e466b180d.png" ], |
|
[ "examples/2d3503f427.png" ], |
|
[ "examples/2f9d3c4e43.png" ], |
|
[ "examples/51c5cc2ff5.png" ], |
|
[ "examples/545a492388.png" ], |
|
[ "examples/6a51a30502.png" ], |
|
[ "examples/6bf6832adb.png" ], |
|
[ "examples/7afdeff0e6.png" ], |
|
[ "examples/b8f1e64b1f.png" ], |
|
] |
|
|
|
|
|
iface = gr.Interface(fn=process_image, |
|
inputs=[gr.inputs.Image(type="pil")], |
|
outputs=gr.outputs.Textbox(), |
|
title=title, |
|
description=description, |
|
article=article, |
|
examples=examples) |
|
iface.launch() |
|
|
|
|