TusharGoel
commited on
Commit
•
17a9f25
1
Parent(s):
7bb3b36
Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,64 @@
|
|
1 |
---
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: mit
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
library_name: transformers
|
6 |
+
pipeline_tag: document-question-answering
|
7 |
---
|
8 |
+
|
9 |
+
Fine tuned on DocVQA Dataset 40000 questions
|
10 |
+
|
11 |
+
```python
|
12 |
+
import json
|
13 |
+
from glob import glob
|
14 |
+
from transformers import AutoProcessor, AutoModelForDocumentQuestionAnswering
|
15 |
+
|
16 |
+
import torch
|
17 |
+
import numpy as np
|
18 |
+
|
19 |
+
model_name = "TusharGoel/LayoutLMv2-finetuned-docvqa"
|
20 |
+
processor = AutoProcessor.from_pretrained(model_name)
|
21 |
+
model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_name)
|
22 |
+
|
23 |
+
|
24 |
+
def pipeline(question, words, boxes, **kwargs):
|
25 |
+
|
26 |
+
images = kwargs["images"]
|
27 |
+
try:
|
28 |
+
encoding = processor(
|
29 |
+
images, question, words,boxes = boxes, return_token_type_ids=True, return_tensors="pt", truncation = True
|
30 |
+
)
|
31 |
+
word_ids = encoding.word_ids(0)
|
32 |
+
|
33 |
+
outputs = model(**encoding)
|
34 |
+
|
35 |
+
start_scores = outputs.start_logits
|
36 |
+
end_scores = outputs.end_logits
|
37 |
+
|
38 |
+
|
39 |
+
start, end = word_ids[start_scores.argmax(-1)], word_ids[end_scores.argmax(-1)]
|
40 |
+
answer = " ".join(words[start : end + 1])
|
41 |
+
|
42 |
+
start_scores, end_scores = start_scores.detach().numpy(), end_scores.detach().numpy()
|
43 |
+
undesired_tokens = encoding['attention_mask']
|
44 |
+
undesired_tokens_mask = undesired_tokens == 0.0
|
45 |
+
|
46 |
+
start_ = np.where(undesired_tokens_mask, -10000.0, start_scores)
|
47 |
+
end_ = np.where(undesired_tokens_mask, -10000.0, end_scores)
|
48 |
+
start_ = np.exp(start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)))
|
49 |
+
end_ = np.exp(end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)))
|
50 |
+
|
51 |
+
outer = np.matmul(np.expand_dims(start_, -1), np.expand_dims(end_, 1))
|
52 |
+
max_answer_len = 20
|
53 |
+
candidates = np.tril(np.triu(outer), max_answer_len - 1)
|
54 |
+
scores_flat = candidates.flatten()
|
55 |
+
|
56 |
+
idx_sort = [np.argmax(scores_flat)]
|
57 |
+
start, end = np.unravel_index(idx_sort, candidates.shape)[1:]
|
58 |
+
|
59 |
+
scores = candidates[0, start, end]
|
60 |
+
score = scores[0]
|
61 |
+
except Exception as e:
|
62 |
+
answer, score = "", 0.0
|
63 |
+
return answer, score
|
64 |
+
```
|