aifeifei798 commited on
Commit
8d8c753
1 Parent(s): 6f094dc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -6
app.py CHANGED
@@ -8,6 +8,12 @@ from transformers import AutoProcessor, AutoModelForCausalLM
8
  import re
9
  from PIL import Image
10
  import io
 
 
 
 
 
 
11
 
12
  import subprocess
13
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -22,6 +28,7 @@ llm = Llama(
22
  ),
23
  n_ctx=2048,
24
  n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM
 
25
  )
26
 
27
 
@@ -63,12 +70,23 @@ def generate_text(
63
  yield picoutput
64
  else:
65
  temp = ""
66
- system_prompt = f'{system_message}'
 
 
 
 
67
  for interaction in history:
68
- history_prompt += str(interaction[0]) + str(interaction[1])
69
- input_prompt = system_prompt + " " + history_prompt + " " + str(in_text)
70
- print(input_prompt)
71
- output = llm(
 
 
 
 
 
 
 
72
  input_prompt,
73
  temperature=temperature,
74
  top_p=top_p,
@@ -84,12 +102,16 @@ def generate_text(
84
  "SYSTEM:",
85
  "<|start_header_id|>",
86
  "<|eot_id|>",
 
 
87
  ],
88
  stream=True,
89
  )
90
  for out in output:
91
  stream = copy.deepcopy(out)
92
- temp += stream["choices"][0]["text"]
 
 
93
  yield temp
94
 
95
 
 
8
  import re
9
  from PIL import Image
10
  import io
11
+ import json
12
+ import logging
13
+
14
+ # Set up logging
15
+ logging.basicConfig(level=logging.DEBUG)
16
+ logger = logging.getLogger(__name__)
17
 
18
  import subprocess
19
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
28
  ),
29
  n_ctx=2048,
30
  n_gpu_layers=100, # change n_gpu_layers if you have more or less VRAM
31
+ chat_format="llama-3",
32
  )
33
 
34
 
 
70
  yield picoutput
71
  else:
72
  temp = ""
73
+ # Create system_prompt as a dictionary
74
+ system_prompt = {"role": "system", "content": system_message}
75
+
76
+ # Create history_prompt as a list of dictionaries
77
+ history_prompt = []
78
  for interaction in history:
79
+ user_part = {"role": "user", "content": str(interaction[0])}
80
+ assistant_part = {"role": "assistant", "content": str(interaction[1])}
81
+ history_prompt.extend([user_part, assistant_part])
82
+
83
+ # Create user_input_part as a dictionary
84
+ user_input_part = {"role": "user", "content": str(in_text)}
85
+
86
+ # Construct input_prompt as a list of dictionaries
87
+ input_prompt = [system_prompt] + history_prompt + [user_input_part]
88
+ logger.debug(f"Input Prompt: {input_prompt}")
89
+ output = llm.create_chat_completion(
90
  input_prompt,
91
  temperature=temperature,
92
  top_p=top_p,
 
102
  "SYSTEM:",
103
  "<|start_header_id|>",
104
  "<|eot_id|>",
105
+ "<im_end>",
106
+ "<|im_end|>",
107
  ],
108
  stream=True,
109
  )
110
  for out in output:
111
  stream = copy.deepcopy(out)
112
+ logger.debug(f"Stream: {stream}")
113
+ if 'delta' in stream['choices'][0] and 'content' in stream['choices'][0]['delta']:
114
+ temp += stream["choices"][0]["delta"]["content"]
115
  yield temp
116
 
117