Jeney
/

vilt-b32-finetuned-vqa

Visual Question Answering

Inference Endpoints

Model card Files Files and versions Community

Jeney commited on Jul 1, 2023

Commit

2e2a941

•

1 Parent(s): 050b504

Add all answers in output

Files changed (1) hide show

handler.py +12 -4

handler.py CHANGED Viewed

@@ -13,17 +13,25 @@ class EndpointHandler:
         self.model = ViltForQuestionAnswering.from_pretrained(path)
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
         # process input
         inputs = data.pop("inputs", data)
         image = inputs["image"]
         image = Image.open(io.BytesIO(eval(image)))
         text = inputs["text"]
         # preprocess
         encoding = self.processor(image, text, return_tensors="pt")
         outputs = self.model(**encoding)
         # postprocess the prediction
         logits = outputs.logits
-        idx = logits.argmax(-1).item()
-        return [{"best_answer": self.model.config.id2label[idx], "outputs": str(outputs)}]

         self.model = ViltForQuestionAnswering.from_pretrained(path)
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # process input
         inputs = data.pop("inputs", data)
         image = inputs["image"]
         image = Image.open(io.BytesIO(eval(image)))
         text = inputs["text"]
         # preprocess
         encoding = self.processor(image, text, return_tensors="pt")
         outputs = self.model(**encoding)
         # postprocess the prediction
         logits = outputs.logits
+        best_idx = logits.argmax(-1).item()
+        best_answer = self.model.config.id2label[best_idx]
+        probabilities = torch.softmax(logits, dim=-1)[0]
+        id2label = self.model.config.id2label
+        answers = []
+        for idx, prob in enumerate(probabilities):
+            answer = id2label[idx]
+            answer_score = float(prob)
+            answers.append({"answer": answer, "answer_score": answer_score})
+        return {"best_answer": best_answer, "answers": answers}