File size: 12,171 Bytes
1639c46
 
bac027d
014ba5e
c03435f
9540a56
a1b571e
f39aadc
4fa4c7b
1639c46
dd9842f
4fa4c7b
 
1847ea5
4fa4c7b
014ba5e
dd9842f
 
4fa4c7b
 
 
a1b571e
4fa4c7b
d4e0b1a
 
 
 
 
 
fd156db
4fa4c7b
 
 
 
 
 
 
 
 
 
 
59904b9
451712e
 
 
4fa4c7b
a1b571e
4fa4c7b
cc225d4
4fa4c7b
 
 
 
 
 
 
 
1847ea5
cc225d4
 
fd156db
4fa4c7b
 
 
 
 
 
 
 
 
 
 
409cbac
451712e
409cbac
4fa4c7b
 
 
cc225d4
4fa4c7b
 
 
 
 
 
 
 
6934609
cc225d4
 
4fa4c7b
 
 
 
 
 
 
 
 
 
 
6f92fa3
de62f09
a1b571e
01af800
 
a1b571e
cce3c9c
01af800
a1b571e
cce3c9c
 
65408a7
 
 
cce3c9c
d4e0b1a
 
 
 
cce3c9c
df22c76
 
4fa4c7b
4bea324
 
3bef8f9
8586e70
df22c76
 
 
 
4fa4c7b
9813584
4fa4c7b
 
 
 
9813584
4fa4c7b
 
 
5d15a5a
d629b6c
df22c76
 
4fa4c7b
 
 
 
 
 
 
 
 
 
cce3c9c
c03435f
 
d8ef990
 
 
 
c03435f
 
 
 
 
 
 
 
 
 
 
 
0b24b94
d8ef990
 
 
0b24b94
d8ef990
 
 
0b24b94
d8ef990
 
 
0b24b94
d8ef990
 
c03435f
a1b571e
 
de62f09
9e36f2b
0b24b94
a1b571e
0b24b94
 
de62f09
 
 
0b24b94
 
 
9e36f2b
 
f571a5a
9e36f2b
0b24b94
d2b0630
ac01403
0b24b94
ac01403
0b24b94
ac01403
0b24b94
3057c0d
0b24b94
3057c0d
 
 
 
 
d4e0b1a
 
 
 
 
 
2e30d3d
 
 
00b9214
d4e0b1a
2e30d3d
 
80b9350
d4e0b1a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
de62f09
1639c46
 
d4e0b1a
dd9842f
 
0b24b94
 
c03435f
1639c46
0b24b94
de62f09
1639c46
0b24b94
de62f09
1639c46
 
0b24b94
dd9842f
1639c46
 
7be9d95
 
 
 
 
1639c46
 
4d068a9
dd9842f
 
 
 
 
de62f09
014ba5e
 
 
 
 
9e16c0f
 
014ba5e
 
 
 
4fa4c7b
dd9842f
1639c46
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
import gradio
import argparse
import os
import boto3
import pandas as pd
from copy import copy
from random import choice

import queue

from constants import css, js_code, js_light, BANNER
from utils import model_response, clear_chat
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama

INIT_MODELS = dict()
S3_SESSION = None
HEADER_MD = None
ABOUT_MD = None
CURRENT_MODELS = queue.LifoQueue()
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
MODEL_LIST = ["TINYLLAMA", "QWEN2INS1B", "RUBASE"]

css = """
#small span{
 font-size: 0.7em;
}
"""

async def model_gen(
        content,
        chat_history,
        model_name: str,
        top_p,
        temp,
        max_tokens,
        no_context=False
        ):
    
    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
    model_manager(model_name, MODEL_LIB, 3)
    if content is None:
        return '', []
    if len(content) == 0:
        return '', []
    
    chat_history = [] if no_context else chat_history
    
    res = await model_response(
        content, 
        chat_history,
        S3_SESSION, 
        INIT_MODELS,
        GEN_LIB,
        model_name, 
        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
        )

    return res

async def model_regen(
        content,
        chat_history,
        model_name: str,
        top_p,
        temp,
        max_tokens,
        no_context=False
        ):
    
    global INIT_MODELS, S3_SESSION, GEN_LIB, MODEL_LIB
    model_manager(model_name, MODEL_LIB, 3)
    if chat_history is None:
        return '', []
    
    chat_history = chat_history[-1] if no_context else chat_history
    content = copy(chat_history[-1][0])
    
    res = await model_response(
        content, 
        chat_history[:-1],
        S3_SESSION, 
        INIT_MODELS,
        GEN_LIB,
        model_name, 
        {"top_p": top_p, "temperature": temp, "max_tokens": max_tokens}
        )

    return res

def model_manager(
        add_model,
        model_lib,
        max_models=3
        ):
    global INIT_MODELS, CURRENT_MODELS
    while CURRENT_MODELS.qsize() >= max_models:
        model_del = CURRENT_MODELS.get()
        INIT_MODELS[model_del] = None
    CURRENT_MODELS.put(add_model)
    INIT_MODELS[add_model] = model_lib[add_model]()

def tab_online_arena():
    global S3_SESSION, GEN_LIB, MODEL_LIB, INIT_MODELS, CURRENT_MODELS, MODEL_LIST
    with gradio.Row():
        with gradio.Column():
            model_left = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Left model")
            chatbot_left = gradio.Chatbot()
        with gradio.Column():
            model_right = gradio.Dropdown(MODEL_LIST, value=choice(MODEL_LIST), interactive=True, multiselect=False, label="Right model")
            chatbot_right = gradio.Chatbot()

    with gradio.Row():
        msg = gradio.Textbox(label='Prompt', placeholder='Put your prompt here')

    with gradio.Row():
        both_good = gradio.Button('Both Good')
        left_better = gradio.Button('Left Better')
        right_better = gradio.Button('Right Better')
        both_bad = gradio.Button('Both Bad')

    with gradio.Row():
        with gradio.Accordion("Parameters", open=False):
            no_context = gradio.Checkbox(label="No context", value=False)
            top_p = gradio.Slider(label='Top P', minimum=0, maximum=1, value=1, step=0.05, interactive=True)
            temp = gradio.Slider(label='Temperature', minimum=0, maximum=1, value=0.7, step=0.05, interactive=True)
            max_tokens = gradio.Slider(label='Max ouput tokens', minimum=1, maximum=2048, value=256, step=1, interactive=True)

    with gradio.Row():
        clear = gradio.ClearButton([msg, chatbot_left, chatbot_right], value='Clear history')
        regen_left = gradio.Button(value='Regenerate left answer')
        regen_right = gradio.Button(value='Regenerate right answer')
        regen_left.click(
            model_regen, 
            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_left]
             )
        regen_right.click(
            model_regen,
            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_right]
             )
    
    with gradio.Blocks():
        model_left.change(clear_chat, [], [msg, chatbot_left])
        model_right.change(clear_chat, [], [msg, chatbot_right])
        msg.submit(
            model_gen, 
            [msg, chatbot_left, model_left, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_left]
             )
        msg.submit(
            model_gen,
            [msg, chatbot_right, model_right, top_p, temp, max_tokens, no_context], 
             [msg, chatbot_right]
             )

def tab_leaderboard():
    df = pd.DataFrame({
            "Model" : ['A', 'B', 'C',], 
            "Test 1" : [0, 1, 0], 
            "Test 2" : [1, 0, 1,],
        })

    # Function to apply text color
    def highlight_cols(x): 
        df = x.copy() 
        # df.loc[:, :] = 'color: purple'
        df[['Model']] = 'color: green'
        return df 

    # Applying the style function
    # s = df.style.apply(highlight_cols, axis = None)

    # Displaying the styled dataframe in Gradio
    with gradio.TabItem("Autogen Metrics", id=0): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("Autometrics", id=1): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("SBS metrics", id=2): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)
    
    with gradio.TabItem("Arena ELO rating", id=3): 
        with gradio.Blocks() as demo:
            gradio.DataFrame(df)

def tab_offline_arena():
    global MODEL_LIST
    with gradio.Row():
        with gradio.Column(scale=1):
            with gradio.Accordion("Choose models to sample from", open=False):
                model_options = MODEL_LIST
                selected_models = gradio.CheckboxGroup(model_options, info="", value=model_options, show_label=False, interactive=True) 
                clear_button = gradio.Button("Clear", scale=1)
                # clear the selected_models
                clear_button.click(lambda: {selected_models: {"value": [], "__type__": "update"}}, inputs=[], outputs=[selected_models])
        with gradio.Column(scale=1):
            with gradio.Accordion("Choose task types to sample from", open=False):
                select_tasks = gradio.CheckboxGroup(['Task 1', "Task 2", "Task 3"], info="", value=['Task 1', "Task 2", "Task 3"], show_label=False, interactive=True) 
                clear_task_button = gradio.Button("Clear", scale=1)
                # clear the select_tasks
                clear_task_button.click(lambda: {select_tasks: {"value": [], "__type__": "update"}}, inputs=[], outputs=[select_tasks])
            btn_show_history = gradio.Button("Click to get sample and models' ouputs")
        with gradio.Column(scale=1):
            with gradio.Accordion("Choose criteria to sample", open=False):
                with gradio.Row():
                    with gradio.Tab("Task 1"):
                        select_criteria_1 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True) 
                    with gradio.Tab("Task 2"):
                        select_criteria_2 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True) 
                    with gradio.Tab("Task 3"):
                        select_criteria_3 = gradio.CheckboxGroup(['Criterion 1', "Criterion 2", "Criterion 3"], info="", value=['Criterion 1', "Criterion 2", "Criterion 3"], show_label=False, interactive=True) 
                
                clear_criteria_button = gradio.Button("Clear", scale=1)
                # clear the select_tasks
                clear_criteria_button.click(lambda: {select_criteria_1: {"value": [], "__type__": "update"},
                                                     select_criteria_2: {"value": [], "__type__": "update"},
                                                     select_criteria_3: {"value": [], "__type__": "update"}}, 
                                                     inputs=[], outputs=[select_criteria_1, select_criteria_2, select_criteria_3])
                
    chatbot_data = [['hey', 'Hey!'], ["are we testing something?", None]]
    text_data = ["Are we?", "Indeed we are."]
    eval_text = '1. Twist it\n2. Bop it\n3. Crank it'

    with gradio.Accordion("History", open=False) as acc_history:
        with gradio.Row():
            with gradio.Column():
                pass
            with gradio.Column(scale=0.8):
                chatbot_history = gradio.Chatbot(container=True, elem_id="small")
            with gradio.Column():
                pass
    with gradio.Row():
        model_a = gradio.Text(label='Model A')
        model_b = gradio.Text(label='Model B')
    with gradio.Row():
        eval_guide = gradio.Text('Get samples and outputs in order to generate guide', label='Evaluation guide')
    with gradio.Row():
        both_good = gradio.Button('Both Good')
        left_better = gradio.Button('Left Better')
        right_better = gradio.Button('Right Better')
        both_bad = gradio.Button('Both Bad')

    with gradio.Row():
        with gradio.Column():
            reason = gradio.Textbox(label='Reasoning', placeholder='Put your reasoning here...', lines=5)
        with gradio.Column():
            gradio.Textbox(value='You have chosen: None', interactive=False, show_label=False)
            submit_next = gradio.Button("Submit your evaluation and get next")
            skip_next = gradio.Button("Skip this example and get next one")

    btn_show_history.click(lambda: (gradio.Accordion("History", open=True), chatbot_data, text_data[0], text_data[1], eval_text), inputs=[], outputs=[acc_history, chatbot_history, model_a, model_b, eval_guide])


def build_demo():
    with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
        gradio.HTML(BANNER, elem_id="banner")
        gradio.Markdown(HEADER_MD)
        with gradio.Tabs() as tabs:
            with gradio.TabItem("🐼 MERA leaderboard", id=0):
                tab_leaderboard()

            with gradio.TabItem("πŸ†š SBS by categories and criteria", id=1):
                tab_offline_arena()

            with gradio.TabItem("πŸ₯Š Model arena", id=2):
                tab_online_arena()
                # _tab_explore()

            with gradio.TabItem("πŸ’ͺ About MERA", id=3):
                gradio.Markdown(ABOUT_MD)
    return demo

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--share", action="store_true")
    # parser.add_argument("--bench_table", help="Path to MERA table", default="data_dir/MERA_jun2024.jsonl")
    args = parser.parse_args()
    # data_load(args.result_file)    
    # TYPES = ["number", "markdown", "number"]

    with open("header.md", "r") as f:
        HEADER_MD = f.read()

    with open("about.md", "r") as f:
        ABOUT_MD = f.read()

    try:
        session = boto3.session.Session()
        S3_SESSION = session.client(
            service_name='s3',
            endpoint_url=os.getenv('S3_ENDPOINT'),
            aws_access_key_id=os.getenv('S3_ACCESS_KEY'),
            aws_secret_access_key=os.getenv('S3_SECRET_KEY'),
        )
    except:
        print('Failed to start s3 session')

    app = build_demo()
    app.launch(share=args.share, height=3000, width="110%", allowed_paths=["."]) # share=args.share

    # demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
    # demo.launch()