Spaces:
Running
on
Zero
Running
on
Zero
temporarily diable cuda when loading lora
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ from copy import deepcopy
|
|
11 |
import numpy as np
|
12 |
import re
|
13 |
from bs4 import BeautifulSoup
|
|
|
14 |
|
15 |
|
16 |
import logging
|
@@ -37,15 +38,38 @@ max_new_tokens = 5000
|
|
37 |
obj_context_feature_type = "text"
|
38 |
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
model.to("cuda") # Huggingface Zero-GPU requires explicit device placement
|
50 |
|
51 |
def get_chatbot_response(user_chat_input, scene_id):
|
|
|
11 |
import numpy as np
|
12 |
import re
|
13 |
from bs4 import BeautifulSoup
|
14 |
+
import torch
|
15 |
|
16 |
|
17 |
import logging
|
|
|
38 |
obj_context_feature_type = "text"
|
39 |
|
40 |
|
41 |
+
class TemporaryDisableCUDA:
|
42 |
+
def __enter__(self):
|
43 |
+
# Store the current CUDA_VISIBLE_DEVICES environment variable
|
44 |
+
self.original_cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
|
45 |
+
# Disable CUDA by setting CUDA_VISIBLE_DEVICES to an invalid value
|
46 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
47 |
+
# Reinitialize CUDA state in PyTorch
|
48 |
+
torch.cuda.device_count()
|
49 |
+
|
50 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
51 |
+
# Restore the original CUDA_VISIBLE_DEVICES environment variable
|
52 |
+
if self.original_cuda_visible_devices is not None:
|
53 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = self.original_cuda_visible_devices
|
54 |
+
else:
|
55 |
+
del os.environ["CUDA_VISIBLE_DEVICES"]
|
56 |
+
# Reinitialize CUDA state in PyTorch
|
57 |
+
torch.cuda.device_count()
|
58 |
+
|
59 |
+
|
60 |
+
# HACK: we need to temporarily disable CUDA b/c when loading model,
|
61 |
+
# PEFT somehow always uses CUDA even if we set device_map to 'cpu'
|
62 |
+
with TemporaryDisableCUDA():
|
63 |
+
tokenizer, model, data_loader = load_model_and_dataloader(
|
64 |
+
model_path=model_path,
|
65 |
+
model_base=model_base,
|
66 |
+
load_8bit=load_8bit,
|
67 |
+
load_4bit=load_4bit,
|
68 |
+
load_bf16=load_bf16,
|
69 |
+
scene_to_obj_mapping=scene_to_obj_mapping,
|
70 |
+
device_map='cpu',
|
71 |
+
) # Huggingface Zero-GPU has to use .to(device) to set the device, otherwise it will fail
|
72 |
+
|
73 |
model.to("cuda") # Huggingface Zero-GPU requires explicit device placement
|
74 |
|
75 |
def get_chatbot_response(user_chat_input, scene_id):
|