Spaces:

KBlueLeaf
/

DTG-demo

Running on Zero

Kohaku-Blueleaf commited on Mar 18

Commit

0d5be9b

•

1 Parent(s): 89f225d

flash attn to prevent oom

Files changed (3) hide show

app.py CHANGED Viewed

@@ -113,7 +113,13 @@ masterpiece, newest, absurdres, {rating}"""
 if __name__ == "__main__":
     models = {
         model_path: [
-            LlamaForCausalLM.from_pretrained(model_path).eval().half().to(DEVICE),
             LlamaTokenizer.from_pretrained(model_path),
         ]
         for model_path in MODEL_PATHS

 if __name__ == "__main__":
     models = {
         model_path: [
+            LlamaForCausalLM.from_pretrained(
+                model_path, attn_implementation="flash_attention_2"
+            )
+            .requires_grad_(False)
+            .eval()
+            .half()
+            .to(DEVICE),
             LlamaTokenizer.from_pretrained(model_path),
         ]
         for model_path in MODEL_PATHS

kgen/generate.py CHANGED Viewed

@@ -83,7 +83,9 @@ def tag_gen(
             repetition_penalty=None,
             max_new_tokens=max_new_tokens,
             stream_output=False,
-            autocast_gen=nullcontext,
             prompt_lookup_num_tokens=10,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,

             repetition_penalty=None,
             max_new_tokens=max_new_tokens,
             stream_output=False,
+            autocast_gen=lambda: (
+                torch.autocast("cuda") if torch.cuda.is_available() else nullcontext()
+            ),
             prompt_lookup_num_tokens=10,
             pad_token_id=tokenizer.eos_token_id,
             eos_token_id=tokenizer.eos_token_id,

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ llama-cpp-python
 gradio
 requests
 sentencepiece
-spaces

 gradio
 requests
 sentencepiece
+spaces
+flash-attn