from transformers import pipeline from langchain_core.runnables import RunnableLambda from langchain_huggingface import HuggingFaceEndpoint from PIL import Image pipe1 = pipeline("object-detection", model="facebook/detr-resnet-50") pipe2 = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning") repo_id = "mistralai/Mistral-7B-Instruct-v0.2" llm = HuggingFaceEndpoint( repo_id=repo_id, temperature=0.5, streaming=True ) def reduce_add(a): ll=dict() for i in a: if i['score'] > 0.89: if i['label'] not in ll.keys(): ll[i['label']] = 1 else: ll[i['label']]+=1 return "there are \n"+', \n'.join([str(i[1])+' '+i[0] for i in ll.items() ]) def image_segmentation_tool(image: str): # image = Image.open(image_path) segmentation_results = pipe1(image) if reduce_add(segmentation_results) == "there are \n": raise Passs() return reduce_add(segmentation_results) def image_caption_tool(image: str): # image = Image.open(image_path) segmentation_results = pipe2(image) if segmentation_results[0]["generated_text"] == "": raise Passs("no result found use different image to create story") return segmentation_results[0]["generated_text"] from langchain_core.prompts import PromptTemplate def story_generation_tool(segmentation_results): prompt_template = """ You are a storyteller. Based on the following segmentation results, create a story: {segmentation_results} Story: """ prompt = PromptTemplate.from_template(prompt_template) story = prompt | llm return story.invoke(input={"segmentation_results":segmentation_results}) # def translation_tool(english_text): # prompt_template = """ # You are a translator. Translate the following English text to Hindi: # {english_text} # Translation: # """ # prompt = PromptTemplate.from_template(prompt_template) # translation = prompt | llm # return translation.invoke(input={"english_text": english_text}) runnable = RunnableLambda(image_segmentation_tool).with_fallbacks([RunnableLambda(image_caption_tool)]) runnable2 = RunnableLambda(story_generation_tool) # runnable3 = RunnableLambda(translation_tool) chain = runnable | runnable2 import gradio as gr title = "Image to short Story Generator" description = """ Upload an image, and this app will generate a short story based on the image. """ async def sepia(input_img): chunks=[] async for e in chain.astream(input_img): chunks.append(e) yield "".join(chunks) demo = gr.Interface(sepia, gr.Image(type='pil'),"textarea",title=title, description=description,live=True ) if __name__ == "__main__": demo.launch()