KaiYuan Tee commited on
Commit
e82112e
β€’
1 Parent(s): 922b30f
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ os.system('python -m pip install --upgrade pip')
4
+ os.system('pip install transformers torch')
5
+
6
+ import gradio as gr
7
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
8
+ from PIL import Image
9
+ import requests
10
+
11
+ INFERENCE_PROCESSOR = "microsoft/trocr-base-printed"
12
+ INFERENCE_MODEL_NAME = "DunnBC22/trocr-base-printed_captcha_ocr"
13
+
14
+ processor = TrOCRProcessor.from_pretrained(INFERENCE_PROCESSOR)
15
+ model = VisionEncoderDecoderModel.from_pretrained(INFERENCE_MODEL_NAME)
16
+
17
+
18
+ # load image examples from the Captcha Dataset
19
+ urls = [
20
+ 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/sample/2b827.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200825Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=7b0b5a9cb5bc595d44a04e7a0ad2da8659333c76e6576bc06deda7a52f8628e5c72b2a75f6da93076027bb1aa6963a5bfeae2ba9c21462bd8b1cdb378ebd78658d80481afaf0ae0a7f6459a4622828d807b380b5c08e008697856cf5c775418ad2324fdcbcbdc607cf434566bf897cb09b78fc51fa1b580fd328bec4170d9ab311d703cd59f059f996a4d0bf43d4c342823e5d4200681973add8dc8842002c2bad8b36f8fe7992b7f8bb3a64ab2355dde9095488799d0164038428a7eb722b55a9debeee6e5e359c7328f8e89dbdabe3315e8ba5bf8144adcd9705016e0ce3d68ccb525c8b83067ba236e1e86904a8a995bba73e61bc27e3e8d6b194b4fa92d7',
21
+ 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/2g783.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200821Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=63bcf63cb9833ac0f054034b33c91c775234c2dc54919a9664c7ce811539291c32608afe1b52445fbfd1062e0b6a7423c179677ec6998442c13189a02063fd121841200eddc76f6463f02f86f072ad8d6c3fce8dcb239b0c3fcf4aaa549b4ba968ba4f6f28674b4e700d8ed5818e3888ded9a2e41f44342e426bce1a180aaa0c7d4a25b27753154a6be8c9a3df34475fa226843fb3457d1861c7ac915b869a839dacb38c8dfcef4f4e3846c520838727d86bcf6b9540bc7c0600c03378d78389899f7f983bb6c2268de3d24a10b5beddfd3d7b8d6711b3fda86a885335c7df54f081cc47199991d63a412593514435806f2a031e6592451cad6d81b5afe0c86e',
22
+ 'https://storage.googleapis.com/kagglesdsdata/datasets/894943/1518327/data/train/4gb3f.png?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20230829%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20230829T200857Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=253b2c62f52afe4a001ab6b240230b177f99ac868c58c718b6ecaa6b1fca0f39f3027023cda3e8ae5b38b86ad31bbb79cbd2dff74f7ab33ec86d20d940d2829d7640ee133db87742544496e53c97b2c05dd64fc9f9e29bedd451b2c92e4656a744dd63a982e65d65c889b8b204823038cdc7ef6cc2c99c2d30e52c84293cad03c6e15043fb976db03b96b77018ece09a58691cf8216d9cd0bde3bef25d86464772f9d8a3f001da50a75a20c85541202c49ca6ff2f7d8e9b16a8274427d881ca76874dfac819c5123807f341c24f968efefd81593ec0e04fdc600285b9e4085073b8949c1c0b75b1de2dd90d468fe548290fbd0c922bb5b392a617725d7abfecc'
23
+ ]
24
+
25
+ for idx, url in enumerate(urls):
26
+ image = Image.open(requests.get(url, stream=True).raw)
27
+ image.save(f"image_{idx}.png")
28
+
29
+ def process_image(image):
30
+ # prepare image
31
+ pixel_values = processor(image, return_tensors="pt").pixel_values
32
+
33
+ # generate (no beam search)
34
+ generated_ids = model.generate(pixel_values)
35
+
36
+ # decode
37
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
38
+
39
+ return generated_text
40
+
41
+ title = "Interactive Demo: Captcha Dataset"
42
+ description = """OrangeFIN Asia CAPTCHA OCR Demo
43
+ """
44
+
45
+
46
+
47
+ examples =[["image_0.png"], ["image_1.png"], ["image_2.png"]]
48
+
49
+ trocr_iface = gr.Interface(fn=process_image,
50
+ inputs=gr.inputs.Image(type="pil"),
51
+ outputs=gr.outputs.Textbox(),
52
+ title=title,
53
+ description=description,
54
+ examples=examples,
55
+ theme='gstaff/xkcd')
56
+
57
+ if __name__ == "__main__":
58
+ trocr_iface.launch(debug=True)