Spaces:
Runtime error
Runtime error
KaiYuan Tee
commited on
Commit
β’
e82112e
1
Parent(s):
922b30f
init
Browse files
app.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
os.system('python -m pip install --upgrade pip')
|
4 |
+
os.system('pip install transformers torch')
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
|
8 |
+
from PIL import Image
|
9 |
+
import requests
|
10 |
+
|
11 |
+
INFERENCE_PROCESSOR = "microsoft/trocr-base-printed"
|
12 |
+
INFERENCE_MODEL_NAME = "DunnBC22/trocr-base-printed_captcha_ocr"
|
13 |
+
|
14 |
+
processor = TrOCRProcessor.from_pretrained(INFERENCE_PROCESSOR)
|
15 |
+
model = VisionEncoderDecoderModel.from_pretrained(INFERENCE_MODEL_NAME)
|
16 |
+
|
17 |
+
|
18 |
+
# load image examples from the Captcha Dataset
|
19 |
+
urls = [
|
20 |
+
'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/sample/2b827.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200825Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=7b0b5a9cb5bc595d44a04e7a0ad2da8659333c76e6576bc06deda7a52f8628e5c72b2a75f6da93076027bb1aa6963a5bfeae2ba9c21462bd8b1cdb378ebd78658d80481afaf0ae0a7f6459a4622828d807b380b5c08e008697856cf5c775418ad2324fdcbcbdc607cf434566bf897cb09b78fc51fa1b580fd328bec4170d9ab311d703cd59f059f996a4d0bf43d4c342823e5d4200681973add8dc8842002c2bad8b36f8fe7992b7f8bb3a64ab2355dde9095488799d0164038428a7eb722b55a9debeee6e5e359c7328f8e89dbdabe3315e8ba5bf8144adcd9705016e0ce3d68ccb525c8b83067ba236e1e86904a8a995bba73e61bc27e3e8d6b194b4fa92d7',
|
21 |
+
'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/2g783.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200821Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=63bcf63cb9833ac0f054034b33c91c775234c2dc54919a9664c7ce811539291c32608afe1b52445fbfd1062e0b6a7423c179677ec6998442c13189a02063fd121841200eddc76f6463f02f86f072ad8d6c3fce8dcb239b0c3fcf4aaa549b4ba968ba4f6f28674b4e700d8ed5818e3888ded9a2e41f44342e426bce1a180aaa0c7d4a25b27753154a6be8c9a3df34475fa226843fb3457d1861c7ac915b869a839dacb38c8dfcef4f4e3846c520838727d86bcf6b9540bc7c0600c03378d78389899f7f983bb6c2268de3d24a10b5beddfd3d7b8d6711b3fda86a885335c7df54f081cc47199991d63a412593514435806f2a031e6592451cad6d81b5afe0c86e',
|
22 |
+
'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/4gb3f.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200857Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=253b2c62f52afe4a001ab6b240230b177f99ac868c58c718b6ecaa6b1fca0f39f3027023cda3e8ae5b38b86ad31bbb79cbd2dff74f7ab33ec86d20d940d2829d7640ee133db87742544496e53c97b2c05dd64fc9f9e29bedd451b2c92e4656a744dd63a982e65d65c889b8b204823038cdc7ef6cc2c99c2d30e52c84293cad03c6e15043fb976db03b96b77018ece09a58691cf8216d9cd0bde3bef25d86464772f9d8a3f001da50a75a20c85541202c49ca6ff2f7d8e9b16a8274427d881ca76874dfac819c5123807f341c24f968efefd81593ec0e04fdc600285b9e4085073b8949c1c0b75b1de2dd90d468fe548290fbd0c922bb5b392a617725d7abfecc'
|
23 |
+
]
|
24 |
+
|
25 |
+
for idx, url in enumerate(urls):
|
26 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
27 |
+
image.save(f"image_{idx}.png")
|
28 |
+
|
29 |
+
def process_image(image):
|
30 |
+
# prepare image
|
31 |
+
pixel_values = processor(image, return_tensors="pt").pixel_values
|
32 |
+
|
33 |
+
# generate (no beam search)
|
34 |
+
generated_ids = model.generate(pixel_values)
|
35 |
+
|
36 |
+
# decode
|
37 |
+
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
38 |
+
|
39 |
+
return generated_text
|
40 |
+
|
41 |
+
title = "Interactive Demo: Captcha Dataset"
|
42 |
+
description = """OrangeFIN Asia CAPTCHA OCR Demo
|
43 |
+
"""
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
examples =[["image_0.png"], ["image_1.png"], ["image_2.png"]]
|
48 |
+
|
49 |
+
trocr_iface = gr.Interface(fn=process_image,
|
50 |
+
inputs=gr.inputs.Image(type="pil"),
|
51 |
+
outputs=gr.outputs.Textbox(),
|
52 |
+
title=title,
|
53 |
+
description=description,
|
54 |
+
examples=examples,
|
55 |
+
theme='gstaff/xkcd')
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
trocr_iface.launch(debug=True)
|