import os | |
from pathlib import Path | |
RANDOM_STATE = 42 | |
GRAZIE_API_JWT_TOKEN = os.environ.get("GRAZIE_API_JWT_TOKEN") | |
GRAZIE_TIMEOUT_SEC = 1.0 | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
HF_RAW_DATASET_NAME = "petrtsv-jb/commit-msg-rewriting" | |
HF_RAW_DATASET_SPLIT = 'train' | |
HF_FULL_COMMITS_DATASET_NAME = "JetBrains-Research/lca-commit-message-generation" | |
HF_FULL_COMMITS_DATASET_SUBNAME = "commitchronicle-py-long" | |
HF_FULL_COMMITS_DATASET_SPLIT = "test" | |
HF_PREDICTIONS_DATASET_NAME = "JetBrains-Research/lca-results" | |
HF_PREDICTIONS_DATASET_SUBNAME = "cmg_gpt_4_0613" | |
HF_PREDICTIONS_DATASET_SPLIT = "test" | |
HF_SYNTHETIC_DATASET_NAME = "petrtsv-jb/synthetic-commit-msg-rewriting" | |
HF_SYNTHETIC_DATASET_SPLIT = 'train' | |
LLM_MODEL = "gpt-4-1106-preview" | |
CACHE_DIR = Path("cache") | |
CACHE_DIR.mkdir(exist_ok=True) | |
OUTPUT_DIR = Path("output") | |
OUTPUT_DIR.mkdir(exist_ok=True) | |
END_TO_START_ARTIFACT = OUTPUT_DIR / "end_to_start.csv" | |
START_TO_END_ARTIFACT = OUTPUT_DIR / "start_to_end.csv" | |
SYNTHETIC_DATASET_ARTIFACT = OUTPUT_DIR / "synthetic.csv" | |
METRICS_CORRELATIONS_ARTIFACT = OUTPUT_DIR / "metrics_correlations.csv" | |
DATA_FOR_LABELING_ARTIFACT = OUTPUT_DIR / "data_for_labeling.csv" | |