kwabs22 commited on
Commit
acfdbf2
1 Parent(s): e95ad42

Testing Suggested Code Fix

Browse files
Files changed (1) hide show
  1. app.py +5 -38
app.py CHANGED
@@ -1,36 +1,3 @@
1
- # import gradio as gr
2
- # from transformers import AutoTokenizer, AutoModelForCausalLM
3
- # import torch
4
- # import spaces
5
-
6
- # tokenizer = None
7
- # model = None
8
-
9
- # def loadmodel():
10
- # tokenizer = AutoTokenizer.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16")
11
- # model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16", torch_dtype='auto', device_map='auto')
12
- # return tokenizer, model
13
-
14
- # @spaces.GPU
15
- # def generate_text(prompt):
16
- # global tokenizer, model
17
- # if tokenizer is None or model is None:
18
- # tokenizer, model = loadmodel()
19
-
20
- # inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
21
- # outputs = model.generate(inputs.input_ids, max_length=100)
22
- # return tokenizer.decode(outputs[0], skip_special_tokens=True)
23
-
24
- # interface = gr.Interface(
25
- # fn=generate_text,
26
- # inputs="text",
27
- # outputs="text",
28
- # title="Meta-Llama-3.1-70B Text Generation",
29
- # description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
30
- # )
31
-
32
- # interface.launch()
33
-
34
  import spaces
35
  import gradio as gr
36
  from transformers import AutoTokenizer, AutoModelForCausalLM
@@ -38,7 +5,7 @@ import torch
38
  import subprocess
39
  import os
40
 
41
- def install_cuda_toolkit():
42
  # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
43
  CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
44
  CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
@@ -55,7 +22,7 @@ def install_cuda_toolkit():
55
  # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
56
  os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
57
 
58
- install_cuda_toolkit()
59
 
60
  device = "cuda" if torch.cuda.is_available() else "cpu"
61
 
@@ -65,15 +32,15 @@ model = AutoModelForCausalLM.from_pretrained("ISTA-DASLab/Meta-Llama-3.1-70B-AQL
65
  @spaces.GPU
66
  def generate_text(prompt):
67
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
68
- outputs = model.generate(inputs.input_ids, max_length=100)
69
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
70
 
71
  interface = gr.Interface(
72
  fn=generate_text,
73
  inputs="text",
74
  outputs="text",
75
- title="Meta-Llama-3.1-70B Text Generation",
76
- description="Enter a prompt and generate text using Meta-Llama-3.1-70B.",
77
  )
78
 
79
  interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import spaces
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
  import subprocess
6
  import os
7
 
8
+ def install_cuda_toolkit(): #Swiftly Provided by https://huggingface.co/John6666 to fix OSError: CUDA_HOME environment variable is not set. Please set it to your CUDA install root.
9
  # CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
10
  CUDA_TOOLKIT_URL = "https://developer.download.nvidia.com/compute/cuda/12.2.0/local_installers/cuda_12.2.0_535.54.03_linux.run"
11
  CUDA_TOOLKIT_FILE = "/tmp/%s" % os.path.basename(CUDA_TOOLKIT_URL)
 
22
  # Fix: arch_list[-1] += '+PTX'; IndexError: list index out of range
23
  os.environ["TORCH_CUDA_ARCH_LIST"] = "8.0;8.6"
24
 
25
+ install_cuda_toolkit() #Swiftly Provided by https://huggingface.co/John6666 to fix OSError: CUDA_HOME environment variable is not set. Please set it to your CUDA install root.
26
 
27
  device = "cuda" if torch.cuda.is_available() else "cpu"
28
 
 
32
  @spaces.GPU
33
  def generate_text(prompt):
34
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
35
+ outputs = model.generate(inputs.input_ids) #, max_length=100)
36
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
37
 
38
  interface = gr.Interface(
39
  fn=generate_text,
40
  inputs="text",
41
  outputs="text",
42
+ title="Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16 Text Generation",
43
+ description="Enter a prompt and generate text using Meta-Llama-3.1-70B-AQLM-PV-2Bit-1x16. Responses are a little bit different Meta-Llama-3.1-70B",
44
  )
45
 
46
  interface.launch()