Spaces:

waidhoferj
/

dance-classifier

Runtime error

App Files Files Community

waidhoferj commited on Dec 28, 2022

Commit

c914273

•

0 Parent(s):

first commit

Browse files

Files changed (13) hide show

.gitattributes +1 -0
.gitignore +5 -0
app.py +107 -0
assets/song-samples/alejandro.wav +3 -0
assets/song-samples/exs_and_ohs.wav +3 -0
assets/song-samples/take_it_to_the_limit.wav +3 -0
dancer_net/dancer_net.py +85 -0
environment.yml +20 -0
main.py +46 -0
preprocessing/dataset.py +49 -0
preprocessing/preprocess.py +104 -0
scrapers/music4dance.py +113 -0
train.py +215 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.wav filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__
+.DS_Store
+data
+logs
+gradio_cached_examples

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from pathlib import Path
+import gradio as gr
+import numpy as np
+import torch
+from preprocessing.preprocess import AudioPipeline
+from preprocessing.preprocess import AudioPipeline
+from dancer_net.dancer_net import ShortChunkCNN
+import os
+import json
+from functools import cache
+import pandas as pd
+@cache
+def get_model(device) -> tuple[ShortChunkCNN, np.ndarray]:
+    model_path = "logs/20221226-230930"
+    weights = os.path.join(model_path, "dancer_net.pt")
+    config_path = os.path.join(model_path, "config.json")
+    with open(config_path) as f:
+        config = json.load(f)
+    labels = np.array(sorted(config["classes"]))
+    model = ShortChunkCNN(n_class=len(labels))
+    model.load_state_dict(torch.load(weights))
+    model = model.to(device).eval()
+    return model, labels
+@cache
+def get_pipeline(sample_rate:int) -> AudioPipeline:
+    return AudioPipeline(input_freq=sample_rate)
+@cache
+def get_dance_map() -> dict:
+    df = pd.read_csv("data/dance_mapping.csv")
+    return df.set_index("id").to_dict()["name"]
+def predict(audio: tuple[int, np.ndarray]) -> list[str]:
+    sample_rate, waveform = audio
+    expected_duration = 6
+    threshold = 0.5
+    sample_len = sample_rate * expected_duration
+    device = "mps"
+    audio_pipeline = get_pipeline(sample_rate)
+    model, labels = get_model(device)
+    if sample_len > len(waveform):
+        raise gr.Error("You must record for at least 6 seconds")
+    if len(waveform.shape) > 1 and waveform.shape[1] > 1:
+        waveform = waveform.transpose(1,0)
+        waveform = waveform.mean(axis=0, keepdims=True)
+    else:
+        waveform = np.expand_dims(waveform, 0)
+    waveform = waveform[: ,:sample_len]
+    waveform = (waveform - waveform.min()) / (waveform.max() - waveform.min()) * 2 - 1
+    waveform = waveform.astype("float32")
+    waveform = torch.from_numpy(waveform)
+    spectrogram = audio_pipeline(waveform)
+    spectrogram = spectrogram.unsqueeze(0).to(device)
+    with torch.no_grad():
+        results = model(spectrogram)
+    dance_mapping = get_dance_map()
+    results = results.squeeze(0).detach().cpu().numpy()
+    result_mask = results > threshold
+    probs = results[result_mask]
+    dances = labels[result_mask]
+    return {dance_mapping[dance_id]:float(prob) for dance_id, prob in zip(dances, probs)} if len(dances) else "Couldn't find a dance."
+def demo():
+    title = "Dance Classifier"
+    description = "Record 6 seconds of a song and find out what dance fits the music."
+    with gr.Blocks() as app:
+        gr.Markdown(f"# {title}")
+        gr.Markdown(description)
+        with gr.Tab("Record Song"):
+            mic_audio = gr.Audio(source="microphone", label="Song Recording")
+            mic_submit = gr.Button("Predict")
+        with gr.Tab("Upload Song") as t:
+            audio_file = gr.Audio(label="Song Audio File")
+            audio_file_submit = gr.Button("Predict")
+        song_samples = Path(os.path.dirname(__file__), "assets", "song-samples")
+        example_audio = [str(song) for song in song_samples.iterdir() if song.name[0] != '.']
+        labels = gr.Label(label="Dances")
+        gr.Markdown("## Examples")
+        gr.Examples(
+            examples=example_audio,
+            inputs=audio_file,
+            outputs=labels,
+            fn=predict,
+            )
+        audio_file_submit.click(fn=predict, inputs=audio_file, outputs=labels)
+        mic_submit.click(fn=predict, inputs=mic_audio, outputs=labels)
+    return app
+if __name__ == "__main__":
+    demo().launch()

assets/song-samples/alejandro.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85f9a65fc4adb1fc0cbdbfafb7f7268a0934d97a120110d3f3a43375e59cba54
+size 5292078

assets/song-samples/exs_and_ohs.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e53fe157ff687b5464e98c7d0c03d0712527c3a7ed24b6b063a328fcf7bf608
+size 5292082

assets/song-samples/take_it_to_the_limit.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c69e0eeb4321c44daaaaf95dd596b1d813b9f7e9b5ef4ac5ae9fe11878d4b13b
+size 5292082

dancer_net/dancer_net.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchaudio import transforms as taT, functional as taF
+DEVICE = "mps"
+class ShortChunkCNN(nn.Module):
+    def __init__(self,
+                n_channels=128,
+                sample_rate=16000,
+                n_class=50):
+        super().__init__()
+        # Spectrogram
+        self.spec_bn = nn.BatchNorm2d(1)
+        # CNN
+        self.res_layers = nn.Sequential(
+            Res_2d(1, n_channels, stride=2),
+            Res_2d(n_channels, n_channels, stride=2),
+            Res_2d(n_channels, n_channels*2, stride=2),
+            Res_2d(n_channels*2, n_channels*2, stride=2),
+            Res_2d(n_channels*2, n_channels*2, stride=2),
+            Res_2d(n_channels*2, n_channels*2, stride=2),
+            Res_2d(n_channels*2, n_channels*4, stride=2)
+        )
+        # Dense
+        self.dense1 = nn.Linear(n_channels*4, n_channels*4)
+        self.bn = nn.BatchNorm1d(n_channels*4)
+        self.dense2 = nn.Linear(n_channels*4, n_class)
+        self.dropout = nn.Dropout(0.3)
+    def forward(self, x):
+        x = self.spec_bn(x)
+        # CNN
+        x = self.res_layers(x)
+        x = x.squeeze(2)
+        # Global Max Pooling
+        if x.size(-1) != 1:
+            x = nn.MaxPool1d(x.size(-1))(x)
+        x = x.squeeze(2)
+        # Dense
+        x = self.dense1(x)
+        x = self.bn(x)
+        x = F.relu(x)
+        x = self.dropout(x)
+        x = self.dense2(x)
+        x = nn.Sigmoid()(x)
+        return x
+class Res_2d(nn.Module):
+    def __init__(self, input_channels, output_channels, shape=3, stride=2):
+        super().__init__()
+        # convolution
+        self.conv_1 = nn.Conv2d(input_channels, output_channels, shape, stride=stride, padding=shape//2)
+        self.bn_1 = nn.BatchNorm2d(output_channels)
+        self.conv_2 = nn.Conv2d(output_channels, output_channels, shape, padding=shape//2)
+        self.bn_2 = nn.BatchNorm2d(output_channels)
+        # residual
+        self.diff = False
+        if (stride != 1) or (input_channels != output_channels):
+            self.conv_3 = nn.Conv2d(input_channels, output_channels, shape, stride=stride, padding=shape//2)
+            self.bn_3 = nn.BatchNorm2d(output_channels)
+            self.diff = True
+        self.relu = nn.ReLU()
+    def forward(self, x):
+        # convolution
+        out = self.bn_2(self.conv_2(self.relu(self.bn_1(self.conv_1(x)))))
+        # residual
+        if self.diff:
+            x = self.bn_3(self.conv_3(x))
+        out = x + out
+        out = self.relu(out)
+        return out

environment.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: dancer-net
+channels:
+  - anaconda
+  - conda-forge
+dependencies:
+  - torchvision
+  - pytorch
+  - numpy
+  - pandas
+  - seaborn
+  - python=3.10
+  - matplotlib
+  - torchaudio
+  - bs4
+  - requests
+  - bidict
+  - tqdm
+  - pip
+    - gradio
+prefix: /opt/homebrew/Caskroom/miniforge/base/envs/dancer-net

main.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torchaudio
+from preprocessing.preprocess import AudioPipeline
+from dancer_net.dancer_net import ShortChunkCNN
+import torch
+import numpy as np
+import os
+import json
+if __name__ == "__main__":
+    audio_file = "data/samples/mzm.iqskzxzx.aac.p.m4a.wav"
+    seconds = 6
+    model_path = "logs/20221226-230930"
+    weights = os.path.join(model_path, "dancer_net.pt")
+    config_path = os.path.join(model_path, "config.json")
+    device = "mps"
+    threshold = 0.5
+    with open(config_path) as f:
+        config = json.load(f)
+    labels = np.array(sorted(config["classes"]))
+    audio_pipeline = AudioPipeline()
+    waveform, sample_rate = torchaudio.load(audio_file)
+    waveform = waveform[:, :seconds * sample_rate]
+    spectrogram = audio_pipeline(waveform)
+    spectrogram = spectrogram.unsqueeze(0).to(device)
+    model = ShortChunkCNN(n_class=len(labels))
+    model.load_state_dict(torch.load(weights))
+    model = model.to(device).eval()
+    with torch.no_grad():
+        results = model(spectrogram)
+    results = results.squeeze(0).detach().cpu().numpy()
+    results = results > threshold
+    results = labels[results]
+    print(results)

preprocessing/dataset.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+from torch.utils.data import Dataset
+import numpy as np
+import torchaudio as ta
+from .preprocess import AudioPipeline
+class SongDataset(Dataset):
+    def __init__(self,
+    audio_paths: list[str],
+    dance_labels: list[np.ndarray],
+    audio_duration=30, # seconds
+    audio_window_duration=6, # seconds
+    ):
+        assert audio_duration % audio_window_duration == 0, "Audio window should divide duration evenly."
+        self.audio_paths = audio_paths
+        self.dance_labels = dance_labels
+        audio_info = ta.info(audio_paths[0])
+        self.sample_rate = audio_info.sample_rate
+        self.audio_window_duration = int(audio_window_duration)
+        self.audio_duration = int(audio_duration)
+        self.audio_pipeline = AudioPipeline(input_freq=self.sample_rate)
+    def __len__(self):
+        return len(self.audio_paths) * self.audio_duration // self.audio_window_duration
+    def __getitem__(self, idx) -> tuple[torch.Tensor, torch.Tensor]:
+        waveform = self._waveform_from_index(idx)
+        spectrogram = self.audio_pipeline(waveform)
+        dance_labels = self._label_from_index(idx)
+        return spectrogram, dance_labels
+    def _waveform_from_index(self, idx:int) -> torch.Tensor:
+        audio_file_idx = idx * self.audio_window_duration // self.audio_duration
+        frame_offset = idx % self.audio_duration // self.audio_window_duration
+        num_frames = self.sample_rate * self.audio_window_duration
+        waveform, sample_rate = ta.load(self.audio_paths[audio_file_idx], frame_offset=frame_offset, num_frames=num_frames)
+        assert sample_rate == self.sample_rate, f"Expected sample rate of {self.sample_rate}. Found {sample_rate}"
+        return waveform
+    def _label_from_index(self, idx:int) -> torch.Tensor:
+        label_idx =  idx * self.audio_window_duration // self.audio_duration
+        return torch.from_numpy(self.dance_labels[label_idx])

preprocessing/preprocess.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import pandas as pd
+import numpy as np
+import re
+import json
+from pathlib import Path
+import os
+import torch
+import torchaudio.transforms as taT
+def url_to_filename(url:str) -> str:
+    return f"{url.split('/')[-1]}.wav"
+def get_songs_with_audio(df:pd.DataFrame, audio_dir:str) -> pd.DataFrame:
+    audio_urls = df["Sample"].replace(".", np.nan)
+    audio_files = set(os.path.basename(f) for f in Path(audio_dir).iterdir())
+    valid_audio = audio_urls.apply(lambda url : url is not np.nan and url_to_filename(url) in audio_files)
+    df = df[valid_audio]
+    return df
+def fix_dance_rating_counts(dance_ratings:pd.Series) -> pd.Series:
+    tag_pattern = re.compile("([A-Za-z]+)(\+|-)(\d+)")
+    dance_ratings = dance_ratings.apply(lambda v : json.loads(v.replace("'", "\"")))
+    def fix_labels(labels:dict) -> dict | float:
+        new_labels = {}
+        for k, v in labels.items():
+            match = tag_pattern.search(k)
+            if match is None:
+                new_labels[k] = new_labels.get(k, 0) + v
+            else:
+                k = match[1]
+                sign = 1 if match[2] == '+' else -1
+                scale = int(match[3])
+                new_labels[k] = new_labels.get(k, 0) + v * scale * sign
+        valid = any(v > 0 for v in new_labels.values())
+        return new_labels if valid else np.nan
+    return dance_ratings.apply(fix_labels)
+def get_unique_labels(dance_labels:pd.Series) -> list:
+    labels = set()
+    for dances in dance_labels:
+        labels |= set(dances)
+    return sorted(labels)
+def vectorize_label_probs(labels: dict[str,int], unique_labels:np.ndarray) -> np.ndarray:
+    """
+    Turns label dict into probability distribution vector based on each label count.
+    """
+    label_vec = np.zeros((len(unique_labels),), dtype="float32")
+    for k, v in labels.items():
+        item_vec = (unique_labels == k) * v
+        label_vec += item_vec
+    lv_cache = label_vec.copy()
+    label_vec[label_vec<0] = 0
+    label_vec /= label_vec.sum()
+    assert not any(np.isnan(label_vec)), f"Provided labels are invalid: {labels}"
+    return label_vec
+def vectorize_multi_label(labels: dict[str,int], unique_labels:np.ndarray) -> np.ndarray:
+    """
+    Turns label dict into binary label vectors for multi-label classification.
+    """
+    probs = vectorize_label_probs(labels,unique_labels)
+    probs[probs > 0.0] = 1.0
+    return probs
+def get_examples(df:pd.DataFrame, audio_dir:str, class_list=None) -> tuple[list[str], list[np.ndarray]]:
+    sampled_songs = get_songs_with_audio(df, audio_dir)
+    sampled_songs.loc[:,"DanceRating"] = fix_dance_rating_counts(sampled_songs["DanceRating"])
+    if class_list is not None:
+        class_list = set(class_list)
+        sampled_songs.loc[:,"DanceRating"] = sampled_songs["DanceRating"].apply(
+            lambda labels : {k: v for k,v in labels.items() if k in class_list}
+            if not pd.isna(labels) and any(label in class_list and amt > 0 for label, amt in labels.items())
+            else np.nan)
+    sampled_songs = sampled_songs.dropna(subset=["DanceRating"])
+    labels = sampled_songs["DanceRating"]
+    unique_labels = np.array(get_unique_labels(labels))
+    labels = labels.apply(lambda i : vectorize_multi_label(i, unique_labels))
+    audio_paths = [os.path.join(audio_dir, url_to_filename(url)) for url in sampled_songs["Sample"]]
+    return audio_paths, list(labels)
+class AudioPipeline(torch.nn.Module):
+    def __init__(
+        self,
+        input_freq=16000,
+        resample_freq=16000,
+    ):
+        super().__init__()
+        self.resample = taT.Resample(orig_freq=input_freq, new_freq=resample_freq)
+        self.spec = taT.MelSpectrogram(sample_rate=resample_freq, n_mels=64, n_fft=1024)
+        self.to_db = taT.AmplitudeToDB()
+    def forward(self, waveform: torch.Tensor) -> torch.Tensor:
+        if waveform.shape[0] > 1:
+            waveform = waveform.mean(0, keepdim=True)
+        waveform = self.resample(waveform)
+        spectrogram = self.spec(waveform)
+        spectrogram = self.to_db(spectrogram)
+        return spectrogram

scrapers/music4dance.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import requests
+from bs4 import BeautifulSoup as bs
+import json
+import argparse
+from pathlib import Path
+import os
+import pandas as pd
+import re
+from tqdm import tqdm
+def scrape_song_library(page_count=2054) -> pd.DataFrame:
+    columns = [
+        "Title",
+        "Artist",
+        "Length",
+        "Tempo",
+        "Beat",
+        "Energy",
+        "Danceability",
+        "Valence",
+        "Sample",
+        "Tags",
+        "DanceRating",
+    ]
+    song_df = pd.DataFrame(columns=columns)
+    for i in tqdm(range(1, page_count + 1), desc="Pages processed"):
+        link = "https://www.music4dance.net/song/Index?filter=v2-Index&page=" + str(i)
+        page = requests.get(link)
+        soup = bs(page.content, "html.parser")
+        songs = pd.DataFrame(get_songs(soup))
+        song_df = pd.concat([song_df, songs], axis=0, ignore_index=True)
+    return song_df
+def get_songs(soup: bs) -> dict:
+    js_obj = re.compile(r"{(.|\n)*}")
+    reset_keys = [
+        "Title",
+        "Artist",
+        "Length",
+        "Tempo",
+        "Beat",
+        "Energy",
+        "Danceability",
+        "Valence",
+        "Sample",
+    ]
+    song_text = [str(v) for v in soup.find_all("script") if "histories" in str(v)][0]
+    songs_data = json.loads(js_obj.search(song_text).group(0))
+    songs = []
+    for song_data in songs_data["histories"]:
+        song = {"Tags": set(), "DanceRating": {}}
+        for feature in song_data["properties"]:
+            if "name" not in feature or "value" not in feature:
+                continue
+            key = feature["name"]
+            value = feature["value"]
+            if key in reset_keys:
+                song[key] = value
+            elif key == "Tag+":
+                song["Tags"].add(value)
+            elif key == "DeleteTag":
+                try:
+                    song["Tags"].remove(value)
+                except:
+                    continue
+            elif key == "DanceRating":
+                dance = value.replace("+1", "")
+                prev = song["DanceRating"].get(dance, 0)
+                song["DanceRating"][dance] = prev + 1
+        songs.append(song)
+    return songs
+def download_song(url: str, out_dir: str):
+    response = requests.get(url)
+    filename = url.split("/")[-1]
+    out_file = Path(out_dir, f"{filename}.mp3")
+    with open(out_file, "wb") as f:
+        f.write(response.content)
+def scrape_dance_info() -> pd.DataFrame:
+    js_obj = re.compile(r"{(.|\n)*}")
+    link = "https://www.music4dance.net/song/Index?filter=v2-Index"
+    page = requests.get(link)
+    soup = bs(page.content, "html.parser")
+    dance_info_text = [str(v) for v in soup.find_all("script") if "environment" in str(v)][0]
+    dance_info = json.loads(js_obj.search(dance_info_text).group(0))
+    dance_info = dance_info["dances"]
+    wanted_keys = ["name", "id", "synonyms", "tempoRange", "songCount"]
+    dance_df = pd.DataFrame([{k:v for k, v in dance.items() if k in wanted_keys}
+     for dance
+     in dance_info])
+    return dance_df
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--page-count", default=2, type=int)
+    parser.add_argument("--out", default="data/song.csv")
+    args = parser.parse_args()
+    out_path = Path(args.out)
+    out_dir = os.path.dirname(out_path)
+    if not os.path.exists(out_dir):
+        print(f"Output location does not exist: {out_dir}")
+    df = scrape_song_library(args.page_count)
+    df.to_csv(out_path)

train.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import datetime
+import os
+import torch
+from torch.utils.data import DataLoader
+import torch.nn as nn
+from tqdm import tqdm
+import pandas as pd
+import numpy as np
+from torch.utils.data import random_split, SubsetRandomSampler
+import json
+from sklearn.model_selection import KFold
+from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
+from preprocessing.dataset import SongDataset
+from preprocessing.preprocess import get_examples
+from dancer_net.dancer_net import ShortChunkCNN
+DEVICE = "mps"
+SEED = 42
+def get_timestamp() -> str:
+    return datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
+class EarlyStopping:
+    def __init__(self, patience=0):
+        self.patience = patience
+        self.last_measure = np.inf
+        self.consecutive_increase = 0
+    def step(self, val) -> bool:
+        if self.last_measure <= val:
+            self.consecutive_increase +=1
+        else:
+            self.consecutive_increase = 0
+        self.last_measure = val
+        return self.patience < self.consecutive_increase
+def calculate_metrics(pred, target, threshold=0.5, prefix=""):
+    target = target.detach().cpu().numpy()
+    pred = pred.detach().cpu().numpy()
+    pred = np.array(pred > threshold, dtype=float)
+    metrics= {
+            'precision': precision_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
+            'recall': recall_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
+            'f1': f1_score(y_true=target, y_pred=pred, average='macro', zero_division=0),
+            'accuracy': accuracy_score(y_true=target, y_pred=pred),
+            }
+    if prefix != "":
+        metrics = {prefix + k : v for k, v in metrics.items()}
+    return metrics
+def evaluate(model:nn.Module, data_loader:DataLoader, criterion, device="mps") -> pd.Series:
+    val_metrics = []
+    for features, labels in (prog_bar := tqdm(data_loader)):
+        features = features.to(device)
+        labels = labels.to(device)
+        with torch.no_grad():
+            outputs = model(features)
+            loss = criterion(outputs, labels)
+        batch_metrics = calculate_metrics(outputs, labels, prefix="val_")
+        batch_metrics["val_loss"] = loss.item()
+        prog_bar.set_description(f'Validation - Loss: {batch_metrics["val_loss"]:.2f}, Accuracy: {batch_metrics["val_accuracy"]:.2f}')
+        val_metrics.append(batch_metrics)
+    return pd.DataFrame(val_metrics).mean()
+def train(
+    model: nn.Module,
+    data_loader: DataLoader,
+    val_loader=None,
+    epochs=3,
+    lr=1e-3,
+    device="mps"):
+    criterion = nn.BCELoss()
+    optimizer = torch.optim.Adam(model.parameters(),lr=lr)
+    early_stop = EarlyStopping(1)
+    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr,
+                                                    steps_per_epoch=int(len(data_loader)),
+                                                    epochs=epochs,
+                                                    anneal_strategy='linear')
+    metrics = []
+    for epoch in range(1,epochs+1):
+        train_metrics = []
+        prog_bar = tqdm(data_loader)
+        for features, labels in prog_bar:
+            features = features.to(device)
+            labels = labels.to(device)
+            optimizer.zero_grad()
+            outputs = model(features)
+            loss = criterion(outputs, labels)
+            loss.backward()
+            optimizer.step()
+            scheduler.step()
+            batch_metrics = calculate_metrics(outputs, labels)
+            batch_metrics["loss"] = loss.item()
+            train_metrics.append(batch_metrics)
+            prog_bar.set_description(f'Training - Epoch: {epoch}/{epochs}, Loss: {batch_metrics["loss"]:.2f}, Accuracy: {batch_metrics["accuracy"]:.2f}')
+        train_metrics = pd.DataFrame(train_metrics).mean()
+        if val_loader is not None:
+            val_metrics = evaluate(model, val_loader, criterion)
+            if early_stop.step(val_metrics["val_f1"]):
+                break
+            epoch_metrics = pd.concat([train_metrics, val_metrics], axis=0)
+        else:
+            epoch_metrics = train_metrics
+        metrics.append(dict(epoch_metrics))
+    return model, metrics
+def cross_validation(seed=42, batch_size=64, k=5, device="mps"):
+    target_classes = ['ATN',
+    'BBA',
+    'BCH',
+    'BLU',
+    'CHA',
+    'CMB',
+    'CSG',
+    'ECS',
+    'HST',
+    'JIV',
+    'LHP',
+    'QST',
+    'RMB',
+    'SFT',
+    'SLS',
+    'SMB',
+    'SWZ',
+    'TGO',
+    'VWZ',
+    'WCS']
+    df = pd.read_csv("data/songs.csv")
+    x,y = get_examples(df, "data/samples",class_list=target_classes)
+    dataset = SongDataset(x,y)
+    splits=KFold(n_splits=k,shuffle=True,random_state=seed)
+    metrics = []
+    for fold, (train_idx,val_idx) in enumerate(splits.split(x,y)):
+        print(f"Fold {fold+1}")
+        train_sampler = SubsetRandomSampler(train_idx)
+        test_sampler = SubsetRandomSampler(val_idx)
+        train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
+        test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
+        n_classes = len(y[0])
+        model = ShortChunkCNN(n_class=n_classes).to(device)
+        model, _ = train(model,train_loader, epochs=2, device=device)
+        val_metrics = evaluate(model, test_loader, nn.BCELoss())
+        metrics.append(val_metrics)
+    metrics = pd.DataFrame(metrics)
+    log_dir = os.path.join(
+    "logs", get_timestamp()
+    )
+    os.makedirs(log_dir, exist_ok=True)
+    metrics.to_csv(model.state_dict(), os.path.join(log_dir, "cross_val.csv"))
+def train_model():
+    target_classes = ['ATN',
+        'BBA',
+        'BCH',
+        'BLU',
+        'CHA',
+        'CMB',
+        'CSG',
+        'ECS',
+        'HST',
+        'JIV',
+        'LHP',
+        'QST',
+        'RMB',
+        'SFT',
+        'SLS',
+        'SMB',
+        'SWZ',
+        'TGO',
+        'VWZ',
+        'WCS']
+    df = pd.read_csv("data/songs.csv")
+    x,y = get_examples(df, "data/samples",class_list=target_classes)
+    dataset = SongDataset(x,y)
+    train_count = int(len(dataset) * 0.9)
+    datasets = random_split(dataset, [train_count, len(dataset) - train_count], torch.Generator().manual_seed(SEED))
+    data_loaders = [DataLoader(data, batch_size=64, shuffle=True) for data in datasets]
+    train_data, val_data = data_loaders
+    example_spec, example_label = dataset[0]
+    n_classes = len(example_label)
+    model = ShortChunkCNN(n_class=n_classes).to(DEVICE)
+    model, metrics = train(model,train_data, val_data, epochs=3, device=DEVICE)
+    log_dir = os.path.join(
+    "logs", get_timestamp()
+    )
+    os.makedirs(log_dir, exist_ok=True)
+    torch.save(model.state_dict(), os.path.join(log_dir, "dancer_net.pt"))
+    metrics = pd.DataFrame(metrics)
+    metrics.to_csv(os.path.join(log_dir, "metrics.csv"))
+    config = {
+        "classes": target_classes
+    }
+    with open(os.path.join(log_dir, "config.json")) as f:
+        json.dump(config, f)
+    print("Training information saved!")
+if __name__ == "__main__":
+    cross_validation()