Spaces:

harry85
/

QWEN-Chat-QA-0.5B

Sleeping

harry85 commited on Jul 4

Commit

44a3a0e

•

1 Parent(s): 6323488

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # Install the necessary packages
-# pip install accelerate transformers fastapi pydantic torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
@@ -9,14 +9,15 @@ from fastapi import FastAPI
 # Initialize the FastAPI app
 app = FastAPI(docs_url="/")
-# Load the model and tokenizer once at startup
-device = "cuda" # the device to load the model onto
 model = AutoModelForCausalLM.from_pretrained(
     "Qwen/Qwen1.5-0.5B-Chat",
     torch_dtype="auto",
     device_map="auto"
-)
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
 # Define the request model

 # Install the necessary packages
+# pip install accelerate transformers fastapi pydantic torch jinja2
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 # Initialize the FastAPI app
 app = FastAPI(docs_url="/")
+# Determine the device to use
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Load the model and tokenizer once at startup
 model = AutoModelForCausalLM.from_pretrained(
     "Qwen/Qwen1.5-0.5B-Chat",
     torch_dtype="auto",
     device_map="auto"
+).to(device)
 tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
 # Define the request model