Hugging Face's logo Hugging Face Search models, datasets, users... Models Datasets Spaces Posts Docs Solutions Pricing Spaces: Satyacoder / vision_test like 0 App Files Community vision_test / app.py Satyacoder's picture Satyacoder Update app.py 8602d39 5 months ago raw history blame contribute delete No virus 1.72 kB from transformers import DetrImageProcessor, DetrForObjectDetection from transformers import BlipProcessor, BlipForConditionalGeneration import torch from PIL import Image import requests import gradio as gr box_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") box_model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large") caption_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large") def predict_bounding_boxes(imageurl:str): try: response = requests.get(imageurl, stream=True) response.raise_for_status() image_data = Image.open(response.raw) inputs = box_processor(images=image_data, return_tensors="pt") outputs = box_model(**inputs) target_sizes = torch.tensor([image_data.size[::-1]]) results = box_processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.70)[0] detections = [{"score": score.item(), "label": box_model.config.id2label[label.item()], "box": box.tolist()} for score, label, box in zip(results["scores"], results["labels"], results["boxes"])] raw_image = image_data.convert('RGB') inputs = caption_processor(raw_image, return_tensors="pt") out = caption_model.generate(**inputs) label = caption_processor.decode(out[0], skip_special_tokens=True) return {"image label": label, "detections": detections} except Exception as e: return {"error": str(e)} app = gr.Interface(fn=predict_bounding_boxes, inputs="text", outputs="json") app.api = True app.launch()