Spaces:

YiYiXu
/

it-happened-one-frame-2

Runtime error

App Files Files Community

yiyixuxu commited on Jun 14, 2022

Commit

96f1e87

•

1 Parent(s): 9855e99

add article

Browse files

Files changed (1) hide show

app.py +36 -37

app.py CHANGED Viewed

@@ -17,9 +17,9 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 model, preprocess = clip.load("ViT-B/32")
-def select_video_format(url, format_note='240p', ext='mp4', max_size = 50000000):
     defaults = ['480p', '360p','240p','144p']
-    ydl_opts = {}
     ydl = youtube_dl.YoutubeDL(ydl_opts)
     info_dict = ydl.extract_info(url, download=False)
     formats = info_dict.get('formats', None)
@@ -28,21 +28,17 @@ def select_video_format(url, format_note='240p', ext='mp4', max_size = 50000000)
                and f['vcodec'].split('.')[0] != 'av01'
                and f['filesize'] is not None and f['filesize'] <= max_size]
     available_format_notes = set([f['format_note'] for f in formats])
-    try:
-        if format_note not in available_format_notes:
-            format_note = [d for d in defaults if d in available_format_notes][0]
-        formats = [f for f in formats if f['format_note'] == format_note]
-        format = formats[0]
-        format_id = format.get('format_id', None)
-        fps = format.get('fps', None)
-        print(f'format selected: {format}')
-    except IndexError as err:
-        print(f"can't find suitable video formats. we are not able to process video larger than 95 Mib at the moment")
-        format, format_id, fps = None, None, None
     return(format, format_id, fps)
-# to-do: delete saved videos
 def download_video(url):
     # create "videos" foder for saved videos
     path_videos = Path('videos')
@@ -58,23 +54,24 @@ def download_video(url):
                 path_video.unlink()
                 print(f'removed video {path_video}')
     # select format to download for given video
-    # by default select 480p and .mp4
-    format, format_id, fps = select_video_format(url)
-    if format_id is not None:
-        ydl_opts = {
         'format':format_id,
         'outtmpl': "videos/%(id)s.%(ext)s"}
-        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-            try:
-                ydl.cache.remove()
-                meta = ydl.extract_info(url)
-                save_location = 'videos/' + meta['id'] + '.' + meta['ext']
-            except youtube_dl.DownloadError as error:
-                print(f'error with download_video function: {error}')
-                save_location = None
-    else:
-        fps, save_location = None, None
     return(fps, save_location)
 def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
@@ -90,7 +87,6 @@ def process_video_parallel(video, skip_frames, dest_path, num_processes, process
         if count  % skip_frames ==0:
           filename =f"{dest_path}/{count}.jpg"
           cv2.imwrite(filename, frame)
-          #print(f"saved {filename}")
         count += 1
     cap.release()
@@ -136,13 +132,14 @@ def captioned_strip(images, caption=None, times=None, rows=1):
         draw.text((60, 3), caption, (255, 255, 255), font=font)
         for i,ts in enumerate(times):
           draw.text((
-              (i % rows) * w + 40 , #column poistion
-               i // rows * h  + 33) # row position
           , ts,
           (255, 255, 255), font=font_small)
     return img
 def run_inference(url, sampling_interval, search_query, bs=526):
     skip_frames, path_frames= vid2frames(url,sampling_interval)
     if path_frames is not None:
       filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
@@ -173,9 +170,10 @@ def run_inference(url, sampling_interval, search_query, bs=526):
       similarity = (100.0 * image_features @ text_features.T)
       values, indices = similarity.topk(4, dim=0)
       best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
-      times = [f'{datetime.timedelta(seconds = ind[0].item() * sampling_interval)}' for ind in indices]
       image_output = captioned_strip(best_frames,search_query, times,2)
       title = search_query
       print('task complete')
@@ -184,7 +182,7 @@ def run_inference(url, sampling_interval, search_query, bs=526):
       image_output = None
     return(title, image_output)
-inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video! ( maximum size 50 MB)"),
           gr.Number(5,label='sampling interval (seconds)'),
           gr.inputs.Textbox(label="What do you want to search?")]
 outputs = [
@@ -192,7 +190,7 @@ outputs = [
     gr.outputs.Image(label=""),
 ]
-example_videos = ['v1rkzUIL8oc', 'k4R5wZs8cxI','0diCvgWv_ng']
 gr.Interface(
     run_inference,
@@ -200,10 +198,11 @@ gr.Interface(
     outputs=outputs,
     title="It Happened One Frame",
     description='A CLIP-based app that search YouTube video frame based on text',
     examples=[
         ['https://youtu.be/v1rkzUIL8oc', 1, "James Cagney dancing down the stairs"],
         ['https://youtu.be/k4R5wZs8cxI', 1, "James Cagney smashes a grapefruit into Mae Clarke's face"],
         ['https://youtu.be/0diCvgWv_ng', 1, "little Deborah practicing her ballet while wearing a tutu in empty restaurant"]
     ]
-).launch(debug=True,enable_queue=True)

 model, preprocess = clip.load("ViT-B/32")
+def select_video_format(url, ydl_opts={}, format_note='240p', ext='mp4', max_size = 500000000):
     defaults = ['480p', '360p','240p','144p']
+    ydl_opts = ydl_opts
     ydl = youtube_dl.YoutubeDL(ydl_opts)
     info_dict = ydl.extract_info(url, download=False)
     formats = info_dict.get('formats', None)
                and f['vcodec'].split('.')[0] != 'av01'
                and f['filesize'] is not None and f['filesize'] <= max_size]
     available_format_notes = set([f['format_note'] for f in formats])
+    if format_note not in available_format_notes:
+      format_note = [d for d in defaults if d in available_format_notes][0]
+    formats = [f for f in formats if f['format_note'] == format_note]
+    format = formats[0]
+    format_id = format.get('format_id', None)
+    fps = format.get('fps', None)
+    print(f'format selected: {format}')
     return(format, format_id, fps)
 def download_video(url):
     # create "videos" foder for saved videos
     path_videos = Path('videos')
                 path_video.unlink()
                 print(f'removed video {path_video}')
     # select format to download for given video
+    # by default select 240p and .mp4
+    try:
+      format, format_id, fps = select_video_format(url)
+      ydl_opts = {
         'format':format_id,
         'outtmpl': "videos/%(id)s.%(ext)s"}
+      with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        try:
+          ydl.cache.remove()
+          meta = ydl.extract_info(url)
+          save_location = 'videos/' + meta['id'] + '.' + meta['ext']
+        except youtube_dl.DownloadError as error:
+          print(f'error with download_video function: {error}')
+          save_location = None
+    except IndexError as err:
+      print(f"can't find suitable video formats. we are not able to process video larger than 95 Mib at the moment")
+      fps, save_location = None, None
     return(fps, save_location)
 def process_video_parallel(video, skip_frames, dest_path, num_processes, process_number):
         if count  % skip_frames ==0:
           filename =f"{dest_path}/{count}.jpg"
           cv2.imwrite(filename, frame)
         count += 1
     cap.release()
         draw.text((60, 3), caption, (255, 255, 255), font=font)
         for i,ts in enumerate(times):
           draw.text((
+              (i // rows) * w + 40 , #column poistion
+               i % rows * h  + 33) # row position
           , ts,
           (255, 255, 255), font=font_small)
     return img
 def run_inference(url, sampling_interval, search_query, bs=526):
+    print(f"search for : {search_query}")
     skip_frames, path_frames= vid2frames(url,sampling_interval)
     if path_frames is not None:
       filenames = sorted(path_frames.glob('*.jpg'),key=lambda p: int(p.stem))
       similarity = (100.0 * image_features @ text_features.T)
       values, indices = similarity.topk(4, dim=0)
+      print(f"indices for best matches{indices}")
+      print(f"filenames for best matches {[filenames[i]for i in indices]}")
       best_frames = [Image.open(filenames[ind]).convert("RGB") for ind in indices]
+      times = [f'{datetime.timedelta(seconds = round(ind[0].item() * sampling_interval,2))}' for ind in indices]
       image_output = captioned_strip(best_frames,search_query, times,2)
       title = search_query
       print('task complete')
       image_output = None
     return(title, image_output)
+inputs = [gr.inputs.Textbox(label="Give us the link to your youtube video! (maximum size 50 MB)"),
           gr.Number(5,label='sampling interval (seconds)'),
           gr.inputs.Textbox(label="What do you want to search?")]
 outputs = [
     gr.outputs.Image(label=""),
 ]
+article = "Check out [this blogpost](https://yiyixuxu.github.io/2022/06/12/It-Happened-One-Frame.html) about this app."
 gr.Interface(
     run_inference,
     outputs=outputs,
     title="It Happened One Frame",
     description='A CLIP-based app that search YouTube video frame based on text',
+    article = article,
     examples=[
         ['https://youtu.be/v1rkzUIL8oc', 1, "James Cagney dancing down the stairs"],
         ['https://youtu.be/k4R5wZs8cxI', 1, "James Cagney smashes a grapefruit into Mae Clarke's face"],
         ['https://youtu.be/0diCvgWv_ng', 1, "little Deborah practicing her ballet while wearing a tutu in empty restaurant"]
     ]
+).launch(debug=True,enable_queue=True,share=True)