asoria HF staff commited on
Commit
e205424
1 Parent(s): e77bff1

Minor details

Browse files
Files changed (2) hide show
  1. app.py +7 -3
  2. notebooks/sft.json +24 -4
app.py CHANGED
@@ -212,15 +212,19 @@ with gr.Blocks(css=css) as demo:
212
  examples=[
213
  [
214
  "scikit-learn/iris",
215
- "Try this dataset for Exploratory Data Analysis",
216
  ],
217
  [
218
  "infinite-dataset-hub/GlobaleCuisineRecipes",
219
- "Try this dataset for Embeddings generation",
220
  ],
221
  [
222
  "infinite-dataset-hub/GlobalBestSellersSummaries",
223
- "Try this dataset for RAG generation",
 
 
 
 
224
  ],
225
  ],
226
  inputs=[dataset_name, text_input],
 
212
  examples=[
213
  [
214
  "scikit-learn/iris",
215
+ "Try this dataset for Exploratory Data Analysis (EDA)",
216
  ],
217
  [
218
  "infinite-dataset-hub/GlobaleCuisineRecipes",
219
+ "Try this dataset for Text Embeddings",
220
  ],
221
  [
222
  "infinite-dataset-hub/GlobalBestSellersSummaries",
223
+ "Try this dataset for Retrieval-augmented generation (RAG)",
224
+ ],
225
+ [
226
+ "asoria/english-quotes-text",
227
+ "Try this dataset for Supervised fine-tuning (SFT)",
228
  ],
229
  ],
230
  inputs=[dataset_name, text_input],
notebooks/sft.json CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "cell_type": "code",
17
- "source": "# Install and import necessary libraries.\n!pip install trl datasets transformers bitsandbytes"
18
  },
19
  {
20
  "cell_type": "code",
@@ -24,6 +24,10 @@
24
  "cell_type": "code",
25
  "source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
26
  },
 
 
 
 
27
  {
28
  "cell_type": "code",
29
  "source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
@@ -34,15 +38,31 @@
34
  },
35
  {
36
  "cell_type": "code",
37
- "source": "model_name = 'facebook/opt-350m'\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')\n\ntrainer = SFTTrainer(\n model = model_name,\n train_dataset=dataset,\n dataset_text_field=dataset_text_field,\n max_seq_length=512,\n args=TrainingArguments(\n per_device_train_batch_size = 1, #Batch size per GPU for training\n gradient_accumulation_steps = 4,\n max_steps = 100, #Total number of training steps.(Overrides epochs)\n learning_rate = 2e-4,\n fp16 = True,\n logging_steps=20,\n output_dir = output_model_name,\n optim = 'paged_adamw_8bit' #Optimizer to use\n )\n)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  },
39
  {
40
  "cell_type": "code",
41
- "source": "# Start training\ntrainer.train()"
42
  },
43
  {
44
  "cell_type": "markdown",
45
- "source": "## 3. Push model to hub"
46
  },
47
  {
48
  "cell_type": "code",
 
14
  },
15
  {
16
  "cell_type": "code",
17
+ "source": "# Install and import necessary libraries\n!pip install trl datasets transformers bitsandbytes"
18
  },
19
  {
20
  "cell_type": "code",
 
24
  "cell_type": "code",
25
  "source": "# Load the dataset\ndataset = load_dataset('{dataset_name}', name='{first_config}', split='{first_split}')\ndataset"
26
  },
27
+ {
28
+ "cell_type": "code",
29
+ "source": "# Split the dataset: 20% for evaluation, 80% for training\ntrain_test_split = dataset.train_test_split(test_size=0.2)\n\n# Get the training and evaluation datasets\ntrain_dataset = train_test_split['train']\neval_dataset = train_test_split['test']"
30
+ },
31
  {
32
  "cell_type": "code",
33
  "source": "# Specify the column name that will be used for training\ndataset_text_field = '{longest_col}'"
 
38
  },
39
  {
40
  "cell_type": "code",
41
+ "source": "model_name = 'facebook/opt-350m' # Replace with your desired model\noutput_model_name = f'{model_name}-{dataset_name}'.replace('/', '-')"
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "source": "# Initialize training arguments, adjust parameters as needed\ntraining_args = TrainingArguments(\n per_device_train_batch_size = 1, #Batch size per GPU for training\n gradient_accumulation_steps = 4,\n max_steps = 100, #Total number of training steps.(Overrides epochs)\n learning_rate = 2e-4,\n fp16 = True,\n logging_steps=20,\n output_dir = output_model_name,\n optim = 'paged_adamw_8bit' #Optimizer to use\n )"
46
+ },
47
+ {
48
+ "cell_type": "code",
49
+ "source": "# Initialize SFTTrainer\ntrainer = SFTTrainer(\n model = model_name,\n train_dataset=train_dataset,\n eval_dataset=eval_dataset,\n dataset_text_field=dataset_text_field,\n max_seq_length=512,\n args=training_args\n)"
50
+ },
51
+ {
52
+ "cell_type": "markdown",
53
+ "source": "## 3. Perform fine-tuning and capture the training process"
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "source": "eval_result_before = trainer.evaluate()\n\n# Start training\ntrainer.train()\n\neval_result_after = trainer.evaluate()"
58
  },
59
  {
60
  "cell_type": "code",
61
+ "source": "print(f'Before training: {eval_result_before}')\nprint(f'After training: {eval_result_after}')"
62
  },
63
  {
64
  "cell_type": "markdown",
65
+ "source": "## 4. Push model to hub (Optional)"
66
  },
67
  {
68
  "cell_type": "code",