Spaces:
Runtime error
Runtime error
import spotipy | |
from spotipy.oauth2 import SpotifyClientCredentials | |
import os | |
import json | |
from pathlib import Path | |
import numpy as np | |
import pandas as pd | |
from tqdm import tqdm | |
from utils import download_song | |
import time | |
def set_env(): | |
here = os.path.dirname(__file__) | |
with open(os.path.join(here, "auth", "spotify.json"), "r") as f: | |
config = json.load(f) | |
os.environ["SPOTIPY_CLIENT_ID"] = config["client_id"] | |
os.environ["SPOTIPY_CLIENT_SECRET"] = config["client_secret"] | |
os.environ["SPOTIPY_REDIRECT_URI"] = "https://localhost:8080/callback" | |
set_env() | |
def get_song_preview_url(song_name:str, spotify:spotipy.Spotify, artist:str = None) -> str | None: | |
info = { | |
"track": song_name | |
} | |
if artist is not None: | |
info["artist"] = artist | |
query = " ".join(f"{k}: {v}" for k,v in info.items()) | |
results = spotify.search(query,type="track", limit=1)["tracks"]["items"] | |
valid_results = len(results) > 0 and results[0] is not None and "preview_url" in results[0] | |
if not valid_results: | |
return None | |
song = results[0] | |
return song["preview_url"] | |
def patch_missing_songs( | |
df: pd.DataFrame, | |
) -> pd.DataFrame: | |
spotify = spotipy.Spotify(auth_manager=SpotifyClientCredentials()) | |
# find songs with missing previews | |
audio_urls = df["Sample"].replace(".", np.nan) | |
missing_audio = pd.isna(audio_urls) | |
missing_df = df[missing_audio] | |
def patch_preview(row: pd.Series): | |
song:str = row["Title"] | |
artist:str = row["Artist"] | |
preview_url = get_song_preview_url(song, spotify, artist) | |
if preview_url is not None: | |
row["Sample"] = preview_url | |
return row | |
rows = [] | |
indices = [] | |
after = 18418 | |
missing_df = missing_df.iloc[after:] | |
total_rows = len(missing_df) | |
for i, row in tqdm(missing_df.iterrows(),total=total_rows): | |
patched_row = patch_preview(row) | |
rows.append(patched_row) | |
indices.append(i) | |
patched_df = pd.DataFrame(rows,index=indices) | |
df.update(patched_df) | |
return df | |
def download_links_from_backup(backup_file:str, output_dir:str): | |
with open(backup_file) as f: | |
links = [x.split(",")[1].strip() for x in f.readlines()] | |
links = [l for l in links if "https" in l] | |
for link in tqdm(links, "Songs Downloaded"): | |
download_song(link, output_dir) | |
time.sleep(5e-3) # hopefully wont be rate limited with delay π€ | |