Gemini-PRO-Vision-Chat

Sleeping

App Files Files Community

Gemini-PRO-Vision-Chat / app.py

ysharma HF staff

Update app.py

3b2a6bd 11 months ago

raw

history blame contribute delete

4.05 kB


	# import required packages
	import google.generativeai as genai
	import os
	import PIL.Image
	import gradio as gr
	from gradio_multimodalchatbot import MultimodalChatbot
	from gradio.data_classes import FileData

	# For better security practices, retrieve sensitive information like API keys from environment variables.

	# Fetch an environment variable.
	GOOGLE_API_KEY = os.environ.get('GOOGLE_API_KEY')
	genai.configure(api_key=GOOGLE_API_KEY)

	# These codelines are just to verify if your api key is correct or not
	# Use them when you clone the repo and build locally
	#!curl \
	#-H 'Content-Type: application/json' \
	#-d '{ "prompt": { "text": "Write a very short story about a magic backpack"} }' \
	#"https://generativelanguage.googleapis.com/v1beta3/models/text-bison-001:generateText?key=<enter-your-key-here>"

	# Initialize genai models
	model = genai.GenerativeModel('gemini-pro')
	modelvis = genai.GenerativeModel('gemini-pro-vision')

	def gemini(input, file, chatbot=[]):
	"""
	Function to handle gemini model and gemini vision model interactions.

	Parameters:
	input (str): The input text.
	file (File): An optional file object for image processing.
	chatbot (list): A list to keep track of chatbot interactions.

	Returns:
	tuple: Updated chatbot interaction list, an empty string, and None.
	"""

	messages = []
	print(chatbot)

	# Process previous chatbot messages if present
	if len(chatbot) != 0:
	for user, bot in chatbot:
	user, bot = user.text, bot.text
	messages.extend([
	{'role': 'user', 'parts': [user]},
	{'role': 'model', 'parts': [bot]}
	])
	messages.append({'role': 'user', 'parts': [input]})
	else:
	messages.append({'role': 'user', 'parts': [input]})

	try:
	# Process image if file is provided
	if file is not None:
	with PIL.Image.open(file.name) as img:
	message = [{'role': 'user', 'parts': [input, img]}]
	response = modelvis.generate_content(message)
	gemini_video_resp = response.text
	messages.append({'role': 'model', 'parts': [gemini_video_resp]})

	# Construct list of messages in the required format
	user_msg = {"text": input, "files": [{"file": FileData(path=file.name)}]}
	bot_msg = {"text": gemini_video_resp, "files": []}
	chatbot.append([user_msg, bot_msg])
	else:
	response = model.generate_content(messages)
	gemini_resp = response.text

	# Construct list of messages in the required format
	user_msg = {"text": input, "files": []}
	bot_msg = {"text": gemini_resp, "files": []}
	chatbot.append([user_msg, bot_msg])
	except Exception as e:
	# Handling exceptions and raising error to the modal
	print(f"An error occurred: {e}")
	raise gr.Error(e)

	return chatbot, "", None

	# Define the Gradio Blocks interface
	with gr.Blocks() as demo:
	# Add a centered header using HTML
	gr.HTML("<center><h1>Gemini-PRO & Gemini-PRO-Vision API</h1></center>")

	# Initialize the MultimodalChatbot component
	multi = MultimodalChatbot(value=[], height=800)

	with gr.Row():
	# Textbox for user input with increased scale for better visibility
	tb = gr.Textbox(scale=4, placeholder='Input text and press Enter')

	# Upload button for image files
	up = gr.UploadButton("Upload Image", file_types=["image"], scale=1)

	# Define the behavior on text submission
	tb.submit(gemini, [tb, up, multi], [multi, tb, up])

	# Define the behavior on image upload
	# Using chained then() calls to update the upload button's state
	up.upload(lambda: gr.UploadButton("Uploading Image..."), [], up) \
	.then(lambda: gr.UploadButton("Image Uploaded"), [], up) \
	.then(lambda: gr.UploadButton("Upload Image"), [], up)

	# Launch the demo with a queue to handle multiple users
	demo.queue().launch()