File size: 1,060 Bytes
207cddf e3c7b5a 207cddf e9d1a5a e3c7b5a 207cddf 642d911 e3c7b5a 370afc1 e9d1a5a e3c7b5a 370afc1 e3c7b5a 370afc1 e3c7b5a e9d1a5a e3c7b5a 370afc1 207cddf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
"""
will do this when I need to.
Is it absolutely necessary to keep track of idioms separately?
"""
import os
import wandb
from idiomify.fetchers import fetch_literal2idiomatic, fetch_config
from idiomify.paths import ROOT_DIR
def main():
config = fetch_config()['idioms']
train_df, _ = fetch_literal2idiomatic(config['ver'])
idioms_df = train_df[['Idiom', "Sense"]]
idioms_df = idioms_df.groupby('Idiom').agg({'Sense': lambda x: list(set(x))})
with wandb.init(entity="eubinecto", project="idiomify") as run:
# the paths to write datasets in
tsv_path = ROOT_DIR / "all.tsv"
idioms_df.to_csv(tsv_path, sep="\t")
artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'],
metadata=config)
artifact.add_file(tsv_path)
# then, we just log them here.
run.log_artifact(artifact, aliases=["latest", config['ver']])
# don't forget to remove them
os.remove(tsv_path)
if __name__ == '__main__':
main()
|