|
import os |
|
|
|
import yaml |
|
|
|
import fiftyone as fo |
|
import fiftyone.utils.random as four |
|
import fiftyone.utils.huggingface as fouh |
|
|
|
|
|
|
|
def shuffle_data(dataset): |
|
"""Shuffle the dataset""" |
|
return dataset.shuffle(seed=51) |
|
|
|
def take_random_sample(dataset): |
|
"""Take a sample from the dataset""" |
|
return dataset.take(size=10,seed=51) |
|
|
|
|
|
training_config = { |
|
|
|
"train_split": 0.9, |
|
"val_split": 0.1, |
|
|
|
|
|
"train_params": { |
|
"epochs": 1, |
|
"batch": 16, |
|
"imgsz": 640, |
|
"lr0": 0.01, |
|
"lrf": 0.01 |
|
} |
|
} |
|
|
|
|
|
|
|
def prepare_dataset(): |
|
""" |
|
Prepare the dataset for model training. |
|
|
|
NOTE: You there are lines you must not modify in this function. They are marked with "DO NOT MODIFY". |
|
|
|
Args: |
|
name (str): The name of the dataset to load. Must be "Voxel51/Data-Centric-Visual-AI-Challenge-Train-Set". |
|
|
|
Returns: |
|
fiftyone.core.dataset.Dataset: The curated dataset. |
|
|
|
Note: |
|
The following code block MUST NOT be removed from your submission: |
|
|
|
This ensures that only the approved dataset is used for the competition. |
|
""" |
|
|
|
|
|
dataset = fouh.load_from_hub("/tmp/data/train") |
|
|
|
|
|
dataset = shuffle_data(dataset) |
|
dataset = take_random_sample(dataset) |
|
|
|
|
|
curated_dataset = dataset.clone(name="curated_dataset") |
|
|
|
curated_dataset.persistent = True |
|
|
|
|
|
def export_to_yolo_format( |
|
samples, |
|
classes, |
|
label_field="ground_truth", |
|
export_dir=".", |
|
splits=["train", "val"] |
|
): |
|
""" |
|
Export samples to YOLO format, optionally handling multiple data splits. |
|
|
|
NOTE: DO NOT MODIFY THIS FUNCTION. |
|
|
|
Args: |
|
samples (fiftyone.core.collections.SampleCollection): The dataset or samples to export. |
|
export_dir (str): The directory where the exported data will be saved. |
|
classes (list): A list of class names for the YOLO format. |
|
label_field (str, optional): The field in the samples that contains the labels. |
|
Defaults to "ground_truth". |
|
splits (str, list, optional): The split(s) to export. Can be a single split name (str) |
|
or a list of split names. If None, all samples are exported as "val" split. |
|
Defaults to None. |
|
|
|
Returns: |
|
None |
|
|
|
""" |
|
if splits is None: |
|
splits = ["val"] |
|
elif isinstance(splits, str): |
|
splits = [splits] |
|
|
|
for split in splits: |
|
split_view = samples if split == "val" and splits == ["val"] else samples.match_tags(split) |
|
|
|
split_view.export( |
|
export_dir=export_dir, |
|
dataset_type=fo.types.YOLOv5Dataset, |
|
label_field=label_field, |
|
classes=classes, |
|
split=split |
|
) |
|
|
|
|
|
def train_model(training_config=training_config): |
|
""" |
|
Train the YOLO model on the given dataset using the provided configuration. |
|
|
|
NOTE: DO NOT MODIFY THIS FUNCTION AT ALL OR YOUR SCRIPT WILL FAIL. |
|
""" |
|
|
|
training_dataset = prepare_dataset() |
|
|
|
print("Splitting the dataset...") |
|
|
|
four.random_split(training_dataset, {"train": training_config['train_split'], "val": training_config['val_split']}) |
|
|
|
print("Dataset split completed.") |
|
|
|
print("Exporting dataset to YOLO format...") |
|
|
|
export_to_yolo_format( |
|
samples=training_dataset, |
|
classes=training_dataset.default_classes, |
|
) |
|
|
|
print("Dataset export completed.") |
|
|
|
print("Initializing the YOLO model...") |
|
|
|
|
|
model = YOLO( |
|
model="/tmp/data/yolo11m.pt", |
|
|
|
) |
|
|
|
print("Model initialized.") |
|
|
|
print("Starting model training...") |
|
|
|
results = model.train( |
|
data="dataset.yaml", |
|
**training_config['train_params'] |
|
) |
|
|
|
print("Model training completed.") |
|
|
|
best_model_path = str(results.save_dir / "weights/best.pt") |
|
|
|
print(f"Best model saved to: {best_model_path}") |
|
|
|
|
|
if __name__=="__main__": |
|
train_model() |
|
|