import duckdb import polars as pl from datasets import load_dataset from model2vec import StaticModel # Load a model from the HuggingFace hub (in this case the potion-base-8M model) model_name = "minishlab/potion-base-8M" model = StaticModel.from_pretrained(model_name) # Make embeddings ds = load_dataset("fka/awesome-chatgpt-prompts") df = ds["train"].to_polars() embeddings = model.encode(df["act"]) df = df.with_columns(pl.Series(embeddings).alias("embeddings")) vector = model.encode("An Ethereum Developer", show_progress_bar=True) duckdb.sql( query=f""" SELECT * FROM df ORDER BY array_cosine_distance(embeddings, {vector.tolist()}::FLOAT[256]) LIMIT 10 """ ).show()