kiddobellamy commited on
Commit
6cf0252
1 Parent(s): af217f3

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +24 -13
handler.py CHANGED
@@ -1,5 +1,5 @@
1
  import torch
2
- from transformers import LlamaForCausalLM, AutoProcessor
3
  from PIL import Image
4
  import base64
5
  import io
@@ -7,11 +7,17 @@ import io
7
  # Load model and processor globally
8
  model_id = "kiddobellamy/Llama_Vision"
9
 
 
10
  model = LlamaForCausalLM.from_pretrained(
11
  model_id,
12
- torch_dtype=torch.bfloat16,
13
  device_map="auto",
14
  )
 
 
 
 
 
15
  processor = AutoProcessor.from_pretrained(model_id)
16
 
17
  def handler(event, context):
@@ -28,24 +34,29 @@ def handler(event, context):
28
  image_bytes = base64.b64decode(image_base64)
29
  image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
30
 
31
- # Prepare the message
32
- messages = [
33
- {"role": "user", "content": [
34
- {"type": "image"},
35
- {"type": "text", "text": prompt}
36
- ]}
37
- ]
38
- input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
39
 
40
- # Process inputs
41
- inputs = processor(image, input_text, return_tensors="pt").to(model.device)
 
 
 
 
 
 
42
 
43
  # Generate output
44
  output_ids = model.generate(**inputs, max_new_tokens=50)
45
- generated_text = processor.decode(output_ids[0], skip_special_tokens=True)
46
 
47
  # Return the result
48
  return {'generated_text': generated_text}
49
 
50
  except Exception as e:
51
  return {'error': str(e)}
 
 
1
  import torch
2
+ from transformers import LlamaForCausalLM, AutoTokenizer, AutoProcessor
3
  from PIL import Image
4
  import base64
5
  import io
 
7
  # Load model and processor globally
8
  model_id = "kiddobellamy/Llama_Vision"
9
 
10
+ # Load the model
11
  model = LlamaForCausalLM.from_pretrained(
12
  model_id,
13
+ torch_dtype=torch.float16, # Use torch.float16 if bfloat16 is not supported
14
  device_map="auto",
15
  )
16
+
17
+ # Load the tokenizer
18
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+
20
+ # Load the processor if needed (for image processing)
21
  processor = AutoProcessor.from_pretrained(model_id)
22
 
23
  def handler(event, context):
 
34
  image_bytes = base64.b64decode(image_base64)
35
  image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
36
 
37
+ # Process image if necessary (depends on your model)
38
+ # Assuming your processor handles image preprocessing
39
+ image_inputs = processor(images=image, return_tensors="pt").to(model.device)
40
+
41
+ # Tokenize the prompt
42
+ text_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
43
 
44
+ # Combine image and text inputs if required by your model
45
+ # This step depends on how your model processes images and text together
46
+ inputs = {
47
+ 'input_ids': text_inputs['input_ids'],
48
+ 'attention_mask': text_inputs['attention_mask'],
49
+ # Include image inputs as required
50
+ # 'pixel_values': image_inputs['pixel_values'],
51
+ }
52
 
53
  # Generate output
54
  output_ids = model.generate(**inputs, max_new_tokens=50)
55
+ generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
56
 
57
  # Return the result
58
  return {'generated_text': generated_text}
59
 
60
  except Exception as e:
61
  return {'error': str(e)}
62
+ #111