import gradio as gr from transformers import TrOCRProcessor, VisionEncoderDecoderModel import requests from PIL import Image url = 'https://huggingface.co/yhshin/latex-ocr/raw/main/tokenizer-wordlevel.json' r = requests.get(url) open('tokenizer-wordlevel.json' , 'wb').write(r.content) processor = TrOCRProcessor.from_pretrained("microsoft/trocr-small-printed") model = VisionEncoderDecoderModel.from_pretrained("yhshin/latex-ocr") from tokenizers import Tokenizer tokenizer = Tokenizer.from_file("tokenizer-wordlevel.json") # load image examples def process_image(image): # prepare image pixel_values = processor(image, return_tensors="pt").pixel_values # generate (no beam search) generated_ids = model.generate(pixel_values) # decode generated_text = tokenizer.decode_batch(generated_ids.tolist(), skip_special_tokens=True)[0] # Strip spaces generated_text = generated_text.replace(" ", "") return generated_text title = "Interactive demo: latex-ocr" description = "Demo for latex-ocr, a machine learning model to parse an image of equation and attempt to find the LaTeX source code that generated it. To use it, simply upload an image or use the example image below and click 'submit'. Results will show up in a few seconds." article = "
Made by Young Ho ShinGithub | Github Repo
" examples =[["examples/image_0.png"], ["image_1.png"], ["image_2.png"]] iface = gr.Interface(fn=process_image, inputs=gr.inputs.Image(type="pil"), outputs=gr.outputs.Textbox(), title=title, description=description, article=article, examples=examples) iface.launch()