pbevan11 commited on
Commit
383292f
1 Parent(s): 2d9dd76

Upload 2 files

Browse files
Files changed (2) hide show
  1. cog.yaml +12 -0
  2. predict.py +35 -0
cog.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ build:
2
+ gpu: true
3
+ python_version: "3.10"
4
+ system_packages:
5
+ - "libgl1-mesa-glx"
6
+ - "libglib2.0-0"
7
+ python_packages:
8
+ - "torch==2.0.1"
9
+ - "transformers==4.30.2"
10
+ - "peft==0.4.0"
11
+
12
+ predict: "predict.py:Predictor"
predict.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from peft import AutoModelForCausalLM
3
+ from transformers import AutoTokenizer
4
+ from cog import BasePredictor, Input
5
+
6
+ class Predictor(BasePredictor):
7
+ def setup(self):
8
+ model_id = 'pbevan11/llama-3-8b-ocr-correction'
9
+ self.model = AutoModelForCausalLM.from_pretrained(model_id, load_in_8bit=True)
10
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id)
11
+ self.tokenizer.pad_token = self.tokenizer.eos_token
12
+
13
+ def predict(self, instruction: str = Input(description="Instruction for the model"),
14
+ inp: str = Input(description="Input text to correct")) -> str:
15
+ prompt = self.create_prompt(instruction, inp)
16
+ input_ids = self.tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
17
+ out_ids = self.model.generate(input_ids=input_ids, max_new_tokens=5000, do_sample=False)
18
+ full_output = self.tokenizer.batch_decode(out_ids.detach().cpu().numpy(), skip_special_tokens=True)[0]
19
+ response_start = full_output.find("### Response:")
20
+ if response_start != -1:
21
+ return full_output[response_start + len("### Response:"):]
22
+ else:
23
+ return full_output[len(prompt):]
24
+
25
+ def create_prompt(self, instruction, inp):
26
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
27
+
28
+ ### Instruction:
29
+ {instruction}
30
+
31
+ ### Input:
32
+ {inp}
33
+
34
+ ### Response:
35
+ """