Spaces:
Running
Running
freQuensy23
commited on
Commit
•
dfa8941
1
Parent(s):
cc03544
Fix add logs
Browse files- app.py +2 -2
- generators.py +2 -2
app.py
CHANGED
@@ -21,8 +21,8 @@ async def handle(system_input: str, user_input: str):
|
|
21 |
buffers[i] += str(outputs[i])
|
22 |
|
23 |
yield list(buffers) + ["", ""]
|
24 |
-
yield list(buffers) + [generate_openllama(system_input, user_input),
|
25 |
-
|
26 |
|
27 |
|
28 |
with gr.Blocks() as demo:
|
|
|
21 |
buffers[i] += str(outputs[i])
|
22 |
|
23 |
yield list(buffers) + ["", ""]
|
24 |
+
yield list(buffers) + [(openllama_generation := generate_openllama(system_input, user_input)), '']
|
25 |
+
yield list(buffers) + [openllama_generation, generate_bloom(system_input, user_input)]
|
26 |
|
27 |
|
28 |
with gr.Blocks() as demo:
|
generators.py
CHANGED
@@ -42,7 +42,7 @@ async def generate_gpt2(system_input, user_input):
|
|
42 |
output = await query_llm({
|
43 |
"inputs": (inputs:=f"{system_input}\n{user_input}"),
|
44 |
}, "openai-community/gpt2")
|
45 |
-
yield output[0]["generated_text"]
|
46 |
|
47 |
|
48 |
async def generate_llama2(system_input, user_input):
|
@@ -67,7 +67,7 @@ def generate_openllama(system_input, user_input):
|
|
67 |
model = LlamaForCausalLM.from_pretrained(
|
68 |
model_path, torch_dtype=torch.float16, device_map='cuda',
|
69 |
)
|
70 |
-
|
71 |
input_text = f"{system_input}\n{user_input}"
|
72 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
73 |
output = model.generate(input_ids, max_length=128)
|
|
|
42 |
output = await query_llm({
|
43 |
"inputs": (inputs:=f"{system_input}\n{user_input}"),
|
44 |
}, "openai-community/gpt2")
|
45 |
+
yield output[0]["generated_text"]
|
46 |
|
47 |
|
48 |
async def generate_llama2(system_input, user_input):
|
|
|
67 |
model = LlamaForCausalLM.from_pretrained(
|
68 |
model_path, torch_dtype=torch.float16, device_map='cuda',
|
69 |
)
|
70 |
+
print('model openllama loaded')
|
71 |
input_text = f"{system_input}\n{user_input}"
|
72 |
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
|
73 |
output = model.generate(input_ids, max_length=128)
|