bstraehle commited on
Commit
5e0038e
1 Parent(s): 9a6d8ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import os, torch
3
  from datasets import load_dataset
4
- from huggingface_hub import push_to_hub
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, pipeline
6
 
7
  ACTION_1 = "Prompt base model"
@@ -28,6 +28,7 @@ def process(action, base_model_name, ft_model_name, dataset_name, system_prompt,
28
 
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
 
31
  dataset = load_dataset(dataset_name)
32
 
33
  print("### Dataset")
@@ -37,6 +38,7 @@ def fine_tune_model(base_model_name, dataset_name):
37
  print("###")
38
 
39
  # Load model
 
40
  model, tokenizer = load_model(base_model_name)
41
 
42
  print("### Model")
@@ -60,6 +62,7 @@ def fine_tune_model(base_model_name, dataset_name):
60
  print("###")
61
 
62
  # Split dataset into training and validation sets
 
63
  #train_dataset = dataset["train"]
64
  #test_dataset = dataset["test"]
65
  train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
@@ -72,6 +75,7 @@ def fine_tune_model(base_model_name, dataset_name):
72
  print("###")
73
 
74
  # Configure training arguments
 
75
  training_args = Seq2SeqTrainingArguments(
76
  output_dir="./output",
77
  logging_dir="./logging",
@@ -97,6 +101,7 @@ def fine_tune_model(base_model_name, dataset_name):
97
  print("###")
98
 
99
  # Create trainer
 
100
  trainer = Seq2SeqTrainer(
101
  model=model,
102
  args=training_args,
@@ -106,16 +111,20 @@ def fine_tune_model(base_model_name, dataset_name):
106
  )
107
 
108
  # Train model
 
109
  #trainer.train()
110
 
111
  # Save model to HF
112
- push_to_hub(
 
113
  local_dir="./output",
114
  repo_id=FT_MODEL_NAME,
115
  repo_type="model",
116
  use_auth_token=True,
117
  )
118
 
 
 
119
  def prompt_model(model_name, system_prompt, user_prompt, sql_context):
120
  pipe = pipeline("text-generation",
121
  model=model_name,
 
1
  import gradio as gr
2
  import os, torch
3
  from datasets import load_dataset
4
+ from huggingface_hub import Repository
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, pipeline
6
 
7
  ACTION_1 = "Prompt base model"
 
28
 
29
  def fine_tune_model(base_model_name, dataset_name):
30
  # Load dataset
31
+
32
  dataset = load_dataset(dataset_name)
33
 
34
  print("### Dataset")
 
38
  print("###")
39
 
40
  # Load model
41
+
42
  model, tokenizer = load_model(base_model_name)
43
 
44
  print("### Model")
 
62
  print("###")
63
 
64
  # Split dataset into training and validation sets
65
+
66
  #train_dataset = dataset["train"]
67
  #test_dataset = dataset["test"]
68
  train_dataset = dataset["train"].shuffle(seed=42).select(range(1000))
 
75
  print("###")
76
 
77
  # Configure training arguments
78
+
79
  training_args = Seq2SeqTrainingArguments(
80
  output_dir="./output",
81
  logging_dir="./logging",
 
101
  print("###")
102
 
103
  # Create trainer
104
+
105
  trainer = Seq2SeqTrainer(
106
  model=model,
107
  args=training_args,
 
111
  )
112
 
113
  # Train model
114
+
115
  #trainer.train()
116
 
117
  # Save model to HF
118
+
119
+ repo = Repository(
120
  local_dir="./output",
121
  repo_id=FT_MODEL_NAME,
122
  repo_type="model",
123
  use_auth_token=True,
124
  )
125
 
126
+ repo.push_to_hub()
127
+
128
  def prompt_model(model_name, system_prompt, user_prompt, sql_context):
129
  pipe = pipeline("text-generation",
130
  model=model_name,