Spaces:

7jimmy
/

ask_to_Image

Runtime error

App Files Files Community

ask_to_Image / functions.py

7jimmy

Upload 6 files

1c0296c 11 months ago

raw

history blame contribute delete

2.3 kB

	from transformers import BlipProcessor, BlipForConditionalGeneration, DetrImageProcessor, DetrForObjectDetection
	from PIL import Image
	import torch


	def get_image_caption(image_path):
	"""
	Generates a short caption for the provided image.

	Args:
	image_path (str): The path to the image file.

	Returns:
	str: A string representing the caption for the image.
	"""
	image = Image.open(image_path).convert('RGB')

	model_name = "Salesforce/blip-image-captioning-large"
	device = "cpu" # cuda

	processor = BlipProcessor.from_pretrained(model_name)
	model = BlipForConditionalGeneration.from_pretrained(model_name).to(device)

	inputs = processor(image, return_tensors='pt').to(device)
	output = model.generate(**inputs, max_new_tokens=20)

	caption = processor.decode(output[0], skip_special_tokens=True)

	return caption


	def detect_objects(image_path):
	"""
	Detects objects in the provided image.

	Args:
	image_path (str): The path to the image file.

	Returns:
	str: A string with all the detected objects. Each object as '[x1, x2, y1, y2, class_name, confindence_score]'.
	"""
	image = Image.open(image_path).convert('RGB')

	processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
	model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

	inputs = processor(images=image, return_tensors="pt")
	outputs = model(**inputs)

	# convert outputs (bounding boxes and class logits) to COCO API
	# let's only keep detections with score > 0.9
	target_sizes = torch.tensor([image.size[::-1]])
	results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

	detections = ""
	for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
	detections += '[{}, {}, {}, {}]'.format(int(box[0]), int(box[1]), int(box[2]), int(box[3]))
	detections += ' {}'.format(model.config.id2label[int(label)])
	detections += ' {}\n'.format(float(score))

	return detections


	if __name__ == '__main__':
	image_path = '/home/phillip/Desktop/todays_tutorial/52_langchain_ask_questions_video/code/test.jpg'
	detections = detect_objects(image_path)
	print(detections)