jedyang97 commited on
Commit
b2bb484
1 Parent(s): d419c1d

temporarily diable cuda when loading lora

Browse files
Files changed (1) hide show
  1. app.py +33 -9
app.py CHANGED
@@ -11,6 +11,7 @@ from copy import deepcopy
11
  import numpy as np
12
  import re
13
  from bs4 import BeautifulSoup
 
14
 
15
 
16
  import logging
@@ -37,15 +38,38 @@ max_new_tokens = 5000
37
  obj_context_feature_type = "text"
38
 
39
 
40
- tokenizer, model, data_loader = load_model_and_dataloader(
41
- model_path=model_path,
42
- model_base=model_base,
43
- load_8bit=load_8bit,
44
- load_4bit=load_4bit,
45
- load_bf16=load_bf16,
46
- scene_to_obj_mapping=scene_to_obj_mapping,
47
- device_map='cpu',
48
- ) # Huggingface Zero-GPU has to use .to(device) to set the device, otherwise it will fail
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  model.to("cuda") # Huggingface Zero-GPU requires explicit device placement
50
 
51
  def get_chatbot_response(user_chat_input, scene_id):
 
11
  import numpy as np
12
  import re
13
  from bs4 import BeautifulSoup
14
+ import torch
15
 
16
 
17
  import logging
 
38
  obj_context_feature_type = "text"
39
 
40
 
41
+ class TemporaryDisableCUDA:
42
+ def __enter__(self):
43
+ # Store the current CUDA_VISIBLE_DEVICES environment variable
44
+ self.original_cuda_visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES")
45
+ # Disable CUDA by setting CUDA_VISIBLE_DEVICES to an invalid value
46
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
47
+ # Reinitialize CUDA state in PyTorch
48
+ torch.cuda.device_count()
49
+
50
+ def __exit__(self, exc_type, exc_value, traceback):
51
+ # Restore the original CUDA_VISIBLE_DEVICES environment variable
52
+ if self.original_cuda_visible_devices is not None:
53
+ os.environ["CUDA_VISIBLE_DEVICES"] = self.original_cuda_visible_devices
54
+ else:
55
+ del os.environ["CUDA_VISIBLE_DEVICES"]
56
+ # Reinitialize CUDA state in PyTorch
57
+ torch.cuda.device_count()
58
+
59
+
60
+ # HACK: we need to temporarily disable CUDA b/c when loading model,
61
+ # PEFT somehow always uses CUDA even if we set device_map to 'cpu'
62
+ with TemporaryDisableCUDA():
63
+ tokenizer, model, data_loader = load_model_and_dataloader(
64
+ model_path=model_path,
65
+ model_base=model_base,
66
+ load_8bit=load_8bit,
67
+ load_4bit=load_4bit,
68
+ load_bf16=load_bf16,
69
+ scene_to_obj_mapping=scene_to_obj_mapping,
70
+ device_map='cpu',
71
+ ) # Huggingface Zero-GPU has to use .to(device) to set the device, otherwise it will fail
72
+
73
  model.to("cuda") # Huggingface Zero-GPU requires explicit device placement
74
 
75
  def get_chatbot_response(user_chat_input, scene_id):