mrfakename commited on
Commit
db8cf0b
1 Parent(s): 876e3b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -11,11 +11,12 @@ Overview of TB-OCR:
11
  """
12
  # check out https://huggingface.co/microsoft/Phi-3.5-vision-instruct for more details
13
 
14
- import torch
15
  from transformers import AutoModelForCausalLM, AutoProcessor
16
  from PIL import Image
17
  import requests
18
-
 
19
  model_id = "yifeihu/TB-OCR-preview-0.1"
20
 
21
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -27,8 +28,8 @@ model = AutoModelForCausalLM.from_pretrained(
27
  model_id,
28
  device_map=DEVICE,
29
  trust_remote_code=True,
30
- torch_dtype="auto",
31
- # _attn_implementation='flash_attention_2',
32
  #load_in_4bit=True # Optional: Load model in 4-bit mode to save memory
33
  )
34
 
@@ -36,7 +37,7 @@ processor = AutoProcessor.from_pretrained(model_id,
36
  trust_remote_code=True,
37
  num_crops=16
38
  )
39
-
40
  def phi_ocr(image_url):
41
  question = "Convert the text to markdown format."
42
  image = Image.open(image_url)
 
11
  """
12
  # check out https://huggingface.co/microsoft/Phi-3.5-vision-instruct for more details
13
 
14
+ import torch, spaces
15
  from transformers import AutoModelForCausalLM, AutoProcessor
16
  from PIL import Image
17
  import requests
18
+ import os
19
+ os.system('pip install -U flash-attn')
20
  model_id = "yifeihu/TB-OCR-preview-0.1"
21
 
22
  DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
28
  model_id,
29
  device_map=DEVICE,
30
  trust_remote_code=True,
31
+ torch_dtype="auto",
32
+ _attn_implementation='flash_attention_2',
33
  #load_in_4bit=True # Optional: Load model in 4-bit mode to save memory
34
  )
35
 
 
37
  trust_remote_code=True,
38
  num_crops=16
39
  )
40
+ @spaces.GPU
41
  def phi_ocr(image_url):
42
  question = "Convert the text to markdown format."
43
  image = Image.open(image_url)