Spaces:

mrfakename
/

TB-OCR

Running on Zero

mrfakename commited on 20 days ago

Commit

db8cf0b

•

1 Parent(s): 876e3b5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,11 +11,12 @@ Overview of TB-OCR:
 """
 # check out https://huggingface.co/microsoft/Phi-3.5-vision-instruct for more details
-import torch
 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import requests
 model_id = "yifeihu/TB-OCR-preview-0.1"
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -27,8 +28,8 @@ model = AutoModelForCausalLM.from_pretrained(
   model_id,
   device_map=DEVICE,
   trust_remote_code=True,
-  torch_dtype="auto",
-#  _attn_implementation='flash_attention_2',
   #load_in_4bit=True # Optional: Load model in 4-bit mode to save memory
 )
@@ -36,7 +37,7 @@ processor = AutoProcessor.from_pretrained(model_id,
   trust_remote_code=True,
   num_crops=16
 )
 def phi_ocr(image_url):
     question = "Convert the text to markdown format."
     image = Image.open(image_url)

 """
 # check out https://huggingface.co/microsoft/Phi-3.5-vision-instruct for more details
+import torch, spaces
 from transformers import AutoModelForCausalLM, AutoProcessor
 from PIL import Image
 import requests
+import os
+os.system('pip install -U flash-attn')
 model_id = "yifeihu/TB-OCR-preview-0.1"
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
   model_id,
   device_map=DEVICE,
   trust_remote_code=True,
+  torch_dtype="auto",
+  _attn_implementation='flash_attention_2',
   #load_in_4bit=True # Optional: Load model in 4-bit mode to save memory
 )
   trust_remote_code=True,
   num_crops=16
 )
+@spaces.GPU
 def phi_ocr(image_url):
     question = "Convert the text to markdown format."
     image = Image.open(image_url)