Anthony G commited on
Commit
4b2da06
1 Parent(s): f94de18

added app.py and requirements.txt

Browse files
Files changed (2) hide show
  1. app.py +85 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import transformers
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
+ from peft import PeftConfig, PeftModel
6
+ import warnings
7
+
8
+ warnings.filterwarnings("ignore")
9
+
10
+
11
+ PEFT_MODEL = "givyboy/phi-2-finetuned-mental-health-conversational"
12
+
13
+ SYSTEM_PROMPT = """Answer the following question truthfully.
14
+ If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
15
+ If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""
16
+
17
+ USER_PROMPT = lambda x: f"""<HUMAN>: {x}\n<ASSISTANT>: """
18
+ ADD_RESPONSE = lambda x, y: f"""<HUMAN>: {x}\n<ASSISTANT>: {y}"""
19
+
20
+ bnb_config = BitsAndBytesConfig(
21
+ load_in_4bit=True,
22
+ bnb_4bit_quant_type="nf4",
23
+ bnb_4bit_use_double_quant=True,
24
+ bnb_4bit_compute_dtype=torch.float16,
25
+ )
26
+
27
+ config = PeftConfig.from_pretrained(PEFT_MODEL)
28
+
29
+ peft_base_model = AutoModelForCausalLM.from_pretrained(
30
+ config.base_model_name_or_path,
31
+ return_dict=True,
32
+ quantization_config=bnb_config,
33
+ device_map="auto",
34
+ trust_remote_code=True,
35
+ )
36
+
37
+ peft_model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)
38
+
39
+ peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
40
+ peft_tokenizer.pad_token = peft_tokenizer.eos_token
41
+
42
+ pipeline = transformers.pipeline(
43
+ "text-generation",
44
+ model=peft_model,
45
+ tokenizer=peft_tokenizer,
46
+ torch_dtype=torch.bfloat16,
47
+ trust_remote_code=True,
48
+ device_map="auto",
49
+ )
50
+
51
+
52
+ def format_message(message: str, history: list[str], memory_limit: int = 3) -> str:
53
+ if len(history) > memory_limit:
54
+ history = history[-memory_limit:]
55
+
56
+ if len(history) == 0:
57
+ return f"{SYSTEM_PROMPT}\n{USER_PROMPT(message)}"
58
+
59
+ formatted_message = f"{SYSTEM_PROMPT}\n{ADD_RESPONSE(history[0][0], history[0][1])}"
60
+
61
+ for msg, ans in history[1:]:
62
+ formatted_message += f"\n{ADD_RESPONSE(msg, ans)}"
63
+
64
+ formatted_message += f"\n{USER_PROMPT(message)}"
65
+ return formatted_message
66
+
67
+
68
+ def get_model_response(message: str, history: list[str]) -> str:
69
+ formatted_message = format_message(message, history)
70
+ sequences = pipeline(
71
+ formatted_message,
72
+ do_sample=True,
73
+ top_k=10,
74
+ num_return_sequences=1,
75
+ eos_token_id=peft_tokenizer.eos_token_id,
76
+ max_length=600,
77
+ truncation=True,
78
+ )[0]
79
+ print(sequences["generated_text"])
80
+ output = sequences["generated_text"].split("<ASSISTANT>:")[-1].strip()
81
+ # print(f"Response: {output}")
82
+ return output
83
+
84
+
85
+ gr.ChatInterface(fn=get_model_response).launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ gradio
3
+ transformers
4
+ peft
5
+ warnings