iqbalamo93 commited on
Commit
af9a42b
1 Parent(s): c2eecdb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +40 -2
README.md CHANGED
@@ -77,6 +77,44 @@ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
77
  print(pipe(prompt)[0]["generated_text"])
78
  ```
79
 
80
- #### Method 2: Merging with base mode explicitly
81
- todo
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  print(pipe(prompt)[0]["generated_text"])
78
  ```
79
 
80
+ #### Method 3: Using peftModel
 
81
 
82
+ ```python
83
+
84
+ bnb_config = BitsAndBytesConfig(
85
+ load_in_4bit=True, # Use 4-bit precision model loading
86
+ bnb_4bit_quant_type="nf4", # Quantization type
87
+ bnb_4bit_compute_dtype="float16", # Compute dtype
88
+ bnb_4bit_use_double_quant=True, # Apply nested quantization
89
+ )
90
+
91
+ model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
92
+ adapter_name = 'iqbalamo93/TinyLlama-1.1B-intermediate-1431k-3T-adapters-ultrachat'
93
+ model = AutoModelForCausalLM.from_pretrained(
94
+ model_name, quantization_config=bnb_config,)
95
+
96
+ model = PeftModel.from_pretrained(
97
+ model,adapter_name
98
+ )
99
+
100
+ tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
101
+
102
+ prompt = """<|user|>
103
+ Tell me something about Large Language Models.</s>
104
+ <|assistant|>
105
+ """
106
+
107
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
108
+ with torch.no_grad():
109
+ outputs = model.generate(
110
+ input_ids=inputs['input_ids'],
111
+ temperature=0.7, # Controls randomness: lower = more deterministic
112
+ top_p=0.9, # Nucleus sampling
113
+ top_k=50, # Top-K sampling
114
+ num_return_sequences=1,)
115
+ for i, output in enumerate(outputs):
116
+ generated_text = tokenizer.decode(output, skip_special_tokens=True)
117
+ print(f"--- Generated Sequence {i + 1} ---")
118
+ print(generated_text)
119
+
120
+ ```