File size: 2,262 Bytes
c475ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import argilla as rg
import os
from tqdm import tqdm

dir_path = os.path.dirname(os.path.realpath(__file__))


rg.init(
    api_url="<argilla-api-url>",
    api_key="<argilla-api-key>"
)

ds = rg.FeedbackDataset.for_preference_modeling(
    number_of_responses=2,
    context=False,
    use_markdown=False,
    guidelines=None,
    metadata_properties=None,
    vectors_settings=None,
)

model_dir = dir_path +'/lora-out/merged'

tokenizer = AutoTokenizer.from_pretrained(model_dir)

dataset = load_dataset("gardner/glaive-function-calling-v2-sharegpt", split="test")

def preprocess_function_calling(examples):
    texts = []
    answers = []
    for chat in examples["conversations"]:
        for turn in chat:
            # from -> role
            turn['role'] = turn.pop('from')
            turn['content'] = turn.pop('value')

            if turn['role'] == 'human':
                turn['role'] = 'user'
            if turn['role'] == 'gpt':
                turn['role'] = 'assistant'

        if chat[-1]['role'] == 'assistant':
            answers.append(chat[-1]['content'])
            del chat[-1] # remove the last assistant turn

        texts.append(tokenizer.apply_chat_template(chat, tokenize=False))

    return { "texts": texts, "answer": answers }

texts = dataset.map(preprocess_function_calling, batched=True)

model = AutoModelForCausalLM.from_pretrained(model_dir).to("cuda")

records = []

for text in tqdm(texts):
    prompt = text['texts'] + "<|im_start|>assistant\n"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    outputs = model.generate(**inputs, max_new_tokens=512)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True, temperature=0.9)
    prompt_len = len(text['texts'] + "<|im_start|>assistant\n")

    response1 = response[prompt_len:].replace("<|im_end|>\n", "").strip()
    response2 = text['answer']
    print(response1)

    records.append(rg.FeedbackRecord(fields={
        "prompt": prompt,
        "response1": response1,
        "response2": response2,
    }))

    # text['response'] = response

ds.add_records(records)
ds.push_to_argilla(name="function-calling", workspace="argilla")