OpenGPT-4o

Running

App Files Files Community

KingNish commited on Jul 18

Commit

cd4b4e1

•

1 Parent(s): 92d81f0

Update chatbot.py

Browse files

Files changed (1) hide show

chatbot.py +5 -12

chatbot.py CHANGED Viewed

@@ -27,8 +27,8 @@ import io  # Add this import for working with image bytes
 # You can also use models that are commented below
 # model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
-model_id = "llava-hf/llava-interleave-qwen-7b-hf"
-# model_id = "llava-hf/llava-interleave-qwen-7b-dpo-hf"
 processor = LlavaProcessor.from_pretrained(model_id)
 model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, use_flash_attention_2=True, low_cpu_mem_usage=True)
 model.to("cuda")
@@ -38,14 +38,7 @@ def sample_frames(video_file, num_frames) :
     try:
         video = cv2.VideoCapture(video_file)
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-        fps = int(video.get(cv2.CAP_PROP_FPS))
-        # extracts 5 images/sec of video
-        if (total_frames/fps) < 3:
-            num_frames = 12
-        elif (total_frames/fps) > 5:
-            num_frames = 24
-        else:
-            num_frames = ((total_frames//fps)*5)
         interval = total_frames // num_frames
         frames = []
         for i in range(total_frames):
@@ -199,7 +192,7 @@ client_mixtral = InferenceClient("NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")
 client_mistral = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 generate_kwargs = dict( max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False )
-system_llava = "<|im_start|>system\nYou are OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Your task is to fulfill users query in best possible way. You are provided with image, videos and 3d structures as input with question your task is to give best possible result and explaination to user.<|im_end|>"
 @spaces.GPU(duration=60, queue=False)
 def model_inference( user_prompt, chat_history, web_search):
@@ -256,7 +249,7 @@ def model_inference( user_prompt, chat_history, web_search):
         image_extensions = tuple([ex for ex, f in image_extensions.items()])
         if image.endswith(video_extensions):
-            image = sample_frames(image, 12)
             image_tokens = "<image>" * int(len(image))
             prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"

 # You can also use models that are commented below
 # model_id = "llava-hf/llava-interleave-qwen-0.5b-hf"
+# model_id = "llava-hf/llava-interleave-qwen-7b-hf"
+model_id = "llava-hf/llava-interleave-qwen-7b-dpo-hf"
 processor = LlavaProcessor.from_pretrained(model_id)
 model = LlavaForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, use_flash_attention_2=True, low_cpu_mem_usage=True)
 model.to("cuda")
     try:
         video = cv2.VideoCapture(video_file)
         total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        num_frames = 12
         interval = total_frames // num_frames
         frames = []
         for i in range(total_frames):
 client_mistral = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 generate_kwargs = dict( max_new_tokens=4000, do_sample=True, stream=True, details=True, return_full_text=False )
+system_llava = "<|im_start|>system\nYou are OpenGPT 4o, an exceptionally capable and versatile AI assistant meticulously crafted by KingNish. Your task is to fulfill users query in best possible way. You are provided with image, videos and 3d structures as input with question your task is to give best possible detailed result and explaination to user.<|im_end|>"
 @spaces.GPU(duration=60, queue=False)
 def model_inference( user_prompt, chat_history, web_search):
         image_extensions = tuple([ex for ex, f in image_extensions.items()])
         if image.endswith(video_extensions):
+            image = sample_frames(image)
             image_tokens = "<image>" * int(len(image))
             prompt = f"<|im_start|>user {image_tokens}\n{user_prompt}<|im_end|><|im_start|>assistant"