Spaces:

gsarti
/

pecore

Running on Zero

App Files Files Community

gsarti commited on Feb 8

Commit

2b66ced

•

1 Parent(s): 45e7dbd

Add presets and model preloading

Browse files

Files changed (4) hide show

app.py +241 -58
contents.py +1 -1
presets.py +58 -0
utils.py +1 -14

app.py CHANGED Viewed

@@ -13,14 +13,25 @@ from contents import (
     title,
 )
 from gradio_highlightedtextbox import HighlightedTextbox
 from style import custom_css
-from utils import get_tuples_from_output
-from inseq import list_feature_attribution_methods, list_step_functions
 from inseq.commands.attribute_context.attribute_context import (
     AttributeContextArgs,
-    attribute_context,
 )
 @spaces.GPU()
@@ -38,17 +49,41 @@ def pecore(
     attribution_std_threshold: float,
     attribution_topk: int,
     input_template: str,
-    input_current_text_template: str,
     output_template: str,
     special_tokens_to_keep: str | list[str] | None,
     model_kwargs: str,
     tokenizer_kwargs: str,
     generation_kwargs: str,
     attribution_kwargs: str,
 ):
-    formatted_input_current_text = input_current_text_template.format(
-        current=input_current_text
-    )
     pecore_args = AttributeContextArgs(
         show_intermediate_outputs=False,
         save_path=os.path.join(os.path.dirname(__file__), "outputs/output.json"),
@@ -66,24 +101,41 @@ def pecore(
         generation_kwargs=json.loads(generation_kwargs),
         attribution_kwargs=json.loads(attribution_kwargs),
         context_sensitivity_metric=context_sensitivity_metric,
-        align_output_context_auto=False,
         prompt_user_for_contextless_output_next_tokens=False,
         special_tokens_to_keep=special_tokens_to_keep,
         context_sensitivity_std_threshold=context_sensitivity_std_threshold,
-        context_sensitivity_topk=context_sensitivity_topk
-        if context_sensitivity_topk > 0
-        else None,
         attribution_std_threshold=attribution_std_threshold,
-        attribution_topk=attribution_topk if attribution_topk > 0 else None,
-        input_current_text=formatted_input_current_text,
-        input_context_text=input_context_text if input_context_text else None,
         input_template=input_template,
-        output_current_text=output_current_text if output_current_text else None,
-        output_context_text=output_context_text if output_context_text else None,
         output_template=output_template,
     )
-    out = attribute_context(pecore_args)
-    return get_tuples_from_output(out), gr.Button(visible=True), gr.Button(visible=True)
 with gr.Blocks(css=custom_css) as demo:
@@ -93,12 +145,12 @@ with gr.Blocks(css=custom_css) as demo:
     with gr.Tab("🐑 Attributing Context"):
         with gr.Row():
             with gr.Column():
-                input_current_text = gr.Textbox(
-                    label="Input query", placeholder="Your input query..."
-                )
                 input_context_text = gr.Textbox(
                     label="Input context", lines=4, placeholder="Your input context..."
                 )
                 attribute_input_button = gr.Button("Submit", variant="primary")
             with gr.Column():
                 pecore_output_highlights = HighlightedTextbox(
@@ -139,15 +191,57 @@ with gr.Blocks(css=custom_css) as demo:
             inputs=[input_current_text, input_context_text],
             outputs=pecore_output_highlights,
         )
-    with gr.Tab("⚙️ Parameters"):
         gr.Markdown("## ⚙️ PECoRe Parameters")
         with gr.Row(equal_height=True):
-            model_name_or_path = gr.Textbox(
-                value="gsarti/cora_mgen",
-                label="Model",
-                info="Hugging Face Hub identifier of the model to analyze with PECoRe.",
-                interactive=True,
-            )
             context_sensitivity_metric = gr.Dropdown(
                 value="kl_divergence",
                 label="Context sensitivity metric",
@@ -224,12 +318,13 @@ with gr.Blocks(css=custom_css) as demo:
                 info="Template to format the output from the model. Use {current} and {context} placeholders.",
                 interactive=True,
             )
-            input_current_text_template = gr.Textbox(
                 value="<Q>:{current}",
                 label="Input current text template",
                 info="Template to format the input query for the model. Use {current} placeholder.",
                 interactive=True,
             )
             special_tokens_to_keep = gr.Dropdown(
                 label="Special tokens to keep",
                 info="Special tokens to keep in the attribution. If empty, all special tokens are ignored.",
@@ -237,8 +332,28 @@ with gr.Blocks(css=custom_css) as demo:
                 multiselect=True,
                 allow_custom_value=True,
             )
         gr.Markdown("## ⚙️ Generation Parameters")
         with gr.Row(equal_height=True):
             output_current_text = gr.Textbox(
                 label="Generation output",
@@ -250,36 +365,37 @@ with gr.Blocks(css=custom_css) as demo:
                 info="If specified, this context is used as starting point for generation. Useful for e.g. chain-of-thought reasoning.",
                 interactive=True,
             )
-        generation_kwargs = gr.Code(
-            value="{}",
-            language="json",
-            label="Generation kwargs",
-            interactive=True,
-            lines=1,
-        )
         gr.Markdown("## ⚙️ Other Parameters")
         with gr.Row(equal_height=True):
-            model_kwargs = gr.Code(
-                value="{}",
-                language="json",
-                label="Model kwargs",
-                interactive=True,
-                lines=1,
-            )
-            tokenizer_kwargs = gr.Code(
-                value="{}",
-                language="json",
-                label="Tokenizer kwargs",
-                interactive=True,
-                lines=1,
-            )
-            attribution_kwargs = gr.Code(
-                value="{}",
-                language="json",
-                label="Attribution kwargs",
-                interactive=True,
-                lines=1,
-            )
     gr.Markdown(how_it_works)
     gr.Markdown(how_to_use)
@@ -301,9 +417,10 @@ with gr.Blocks(css=custom_css) as demo:
             attribution_std_threshold,
             attribution_topk,
             input_template,
-            input_current_text_template,
             output_template,
             special_tokens_to_keep,
             model_kwargs,
             tokenizer_kwargs,
             generation_kwargs,
@@ -316,4 +433,70 @@ with gr.Blocks(css=custom_css) as demo:
         ],
     )
 demo.launch(allowed_paths=["outputs/"])

     title,
 )
 from gradio_highlightedtextbox import HighlightedTextbox
+from presets import (
+    set_chatml_preset,
+    set_cora_preset,
+    set_default_preset,
+    set_mmt_preset,
+    set_towerinstruct_preset,
+    set_zephyr_preset,
+)
 from style import custom_css
+from utils import get_formatted_attribute_context_results
+from inseq import list_feature_attribution_methods, list_step_functions, load_model
 from inseq.commands.attribute_context.attribute_context import (
     AttributeContextArgs,
+    attribute_context_with_model,
 )
+from inseq.models import HuggingfaceModel
+loaded_model: HuggingfaceModel = None
 @spaces.GPU()
     attribution_std_threshold: float,
     attribution_topk: int,
     input_template: str,
+    contextless_input_current_text: str,
     output_template: str,
     special_tokens_to_keep: str | list[str] | None,
+    decoder_input_output_separator: str,
     model_kwargs: str,
     tokenizer_kwargs: str,
     generation_kwargs: str,
     attribution_kwargs: str,
 ):
+    global loaded_model
+    if "{context}" in output_template and not output_context_text:
+        raise gr.Error(
+            "Parameter 'Generated context' is required when using {context} in the output template."
+        )
+    if loaded_model is None or model_name_or_path != loaded_model.model_name:
+        gr.Info("Loading model...")
+        loaded_model = load_model(
+            model_name_or_path,
+            attribution_method,
+            model_kwargs=json.loads(model_kwargs),
+            tokenizer_kwargs=json.loads(tokenizer_kwargs),
+        )
+    kwargs = {}
+    if context_sensitivity_topk > 0:
+        kwargs["context_sensitivity_topk"] = context_sensitivity_topk
+    if attribution_topk > 0:
+        kwargs["attribution_topk"] = attribution_topk
+    if input_context_text:
+        kwargs["input_context_text"] = input_context_text
+    if output_context_text:
+        kwargs["output_context_text"] = output_context_text
+    if output_current_text:
+        kwargs["output_current_text"] = output_current_text
+    if decoder_input_output_separator:
+        kwargs["decoder_input_output_separator"] = decoder_input_output_separator
     pecore_args = AttributeContextArgs(
         show_intermediate_outputs=False,
         save_path=os.path.join(os.path.dirname(__file__), "outputs/output.json"),
         generation_kwargs=json.loads(generation_kwargs),
         attribution_kwargs=json.loads(attribution_kwargs),
         context_sensitivity_metric=context_sensitivity_metric,
         prompt_user_for_contextless_output_next_tokens=False,
         special_tokens_to_keep=special_tokens_to_keep,
         context_sensitivity_std_threshold=context_sensitivity_std_threshold,
         attribution_std_threshold=attribution_std_threshold,
+        input_current_text=input_current_text,
         input_template=input_template,
         output_template=output_template,
+        contextless_input_current_text=contextless_input_current_text,
+        handle_output_context_strategy="pre",
+        **kwargs,
     )
+    out = attribute_context_with_model(pecore_args, loaded_model)
+    tuples = get_formatted_attribute_context_results(loaded_model, out.info, out)
+    if not tuples:
+        msg = "Warning: No pairs were found by PECoRe. Try adjusting Results Selection parameters."
+        tuples = [(msg, None)]
+    return tuples, gr.Button(visible=True), gr.Button(visible=True)
+@spaces.GPU()
+def preload_model(
+    model_name_or_path: str,
+    attribution_method: str,
+    model_kwargs: str,
+    tokenizer_kwargs: str,
+):
+    global loaded_model
+    if loaded_model is None or model_name_or_path != loaded_model.model_name:
+        gr.Info("Loading model...")
+        loaded_model = load_model(
+            model_name_or_path,
+            attribution_method,
+            model_kwargs=json.loads(model_kwargs),
+            tokenizer_kwargs=json.loads(tokenizer_kwargs),
+        )
 with gr.Blocks(css=custom_css) as demo:
     with gr.Tab("🐑 Attributing Context"):
         with gr.Row():
             with gr.Column():
                 input_context_text = gr.Textbox(
                     label="Input context", lines=4, placeholder="Your input context..."
                 )
+                input_current_text = gr.Textbox(
+                    label="Input query", placeholder="Your input query..."
+                )
                 attribute_input_button = gr.Button("Submit", variant="primary")
             with gr.Column():
                 pecore_output_highlights = HighlightedTextbox(
             inputs=[input_current_text, input_context_text],
             outputs=pecore_output_highlights,
         )
+    with gr.Tab("⚙️ Parameters") as params_tab:
+        gr.Markdown("## ✨ Presets")
+        with gr.Row(equal_height=True):
+            with gr.Column():
+                default_preset = gr.Button("Default", variant="secondary")
+                gr.Markdown(
+                    "Default preset using templates without special tokens or parameters.\nCan be used with most decoder-only and encoder-decoder models."
+                )
+            with gr.Column():
+                cora_preset = gr.Button("CORA mQA", variant="secondary")
+                gr.Markdown(
+                    "Preset for the <a href='https://huggingface.co/gsarti/cora_mgen' target='_blank'>CORA Multilingual QA</a> model.\nUses special templates for inputs."
+                )
+            with gr.Column():
+                zephyr_preset = gr.Button("Zephyr Template", variant="secondary")
+                gr.Markdown(
+                    "Preset for models using the <a href='https://huggingface.co/HuggingFaceH4/zephyr-7b-beta' target='_blank'>Zephyr conversational template</a>.\nUses <code><|system|></code>, <code><|user|></code> and <code><|assistant|></code> special tokens."
+                )
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=1):
+                multilingual_mt_template = gr.Button(
+                    "Multilingual MT", variant="secondary"
+                )
+                gr.Markdown(
+                    "Present for multilingual MT models such as <a href='https://huggingface.co/facebook/nllb-200-distilled-600M' target='_blank'>NLLB</a> and <a href='https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt' target='_blank'>mBART</a> using language tags."
+                )
+            with gr.Column(scale=1):
+                chatml_template = gr.Button("ChatML Template", variant="secondary")
+                gr.Markdown(
+                    "Preset for models using the <a href='https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/ai-services/openai/includes/chat-markup-language.md' target='_blank'>ChatML conversational template</a>.\nUses <code><|im_start|></code>, <code><|im_end|></code> special tokens."
+                )
+            with gr.Column(scale=1):
+                towerinstruct_template = gr.Button(
+                    "Unbabel TowerInstruct", variant="secondary"
+                )
+                gr.Markdown(
+                    "Preset for models using the <a href='https://huggingface.co/Unbabel/TowerInstruct-7B-v0.1' target='_blank'>Unbabel TowerInstruct</a> conversational template.\nUses <code><|im_start|></code>, <code><|im_end|></code> special tokens."
+                )
         gr.Markdown("## ⚙️ PECoRe Parameters")
         with gr.Row(equal_height=True):
+            with gr.Column():
+                model_name_or_path = gr.Textbox(
+                    value="gpt2",
+                    label="Model",
+                    info="Hugging Face Hub identifier of the model to analyze with PECoRe.",
+                    interactive=True,
+                )
+                load_model_button = gr.Button(
+                    "Load model",
+                    variant="secondary",
+                )
             context_sensitivity_metric = gr.Dropdown(
                 value="kl_divergence",
                 label="Context sensitivity metric",
                 info="Template to format the output from the model. Use {current} and {context} placeholders.",
                 interactive=True,
             )
+            contextless_input_current_text = gr.Textbox(
                 value="<Q>:{current}",
                 label="Input current text template",
                 info="Template to format the input query for the model. Use {current} placeholder.",
                 interactive=True,
             )
+        with gr.Row(equal_height=True):
             special_tokens_to_keep = gr.Dropdown(
                 label="Special tokens to keep",
                 info="Special tokens to keep in the attribution. If empty, all special tokens are ignored.",
                 multiselect=True,
                 allow_custom_value=True,
             )
+            decoder_input_output_separator = gr.Textbox(
+                label="Decoder input/output separator",
+                info="Separator to use between input and output in the decoder input.",
+                value="",
+                interactive=True,
+                lines=1,
+            )
         gr.Markdown("## ⚙️ Generation Parameters")
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=0.5):
+                gr.Markdown(
+                    "The following arguments can be used to control generation parameters and force specific model outputs."
+                )
+            with gr.Column(scale=1):
+                generation_kwargs = gr.Code(
+                    value="{}",
+                    language="json",
+                    label="Generation kwargs (JSON)",
+                    interactive=True,
+                    lines=1,
+                )
         with gr.Row(equal_height=True):
             output_current_text = gr.Textbox(
                 label="Generation output",
                 info="If specified, this context is used as starting point for generation. Useful for e.g. chain-of-thought reasoning.",
                 interactive=True,
             )
         gr.Markdown("## ⚙️ Other Parameters")
         with gr.Row(equal_height=True):
+            with gr.Column():
+                gr.Markdown(
+                    "The following arguments will be passed to initialize the Hugging Face model and tokenizer, and to the `inseq_model.attribute` method."
+                )
+            with gr.Column():
+                model_kwargs = gr.Code(
+                    value="{}",
+                    language="json",
+                    label="Model kwargs (JSON)",
+                    interactive=True,
+                    lines=1,
+                    min_width=160,
+                )
+            with gr.Column():
+                tokenizer_kwargs = gr.Code(
+                    value="{}",
+                    language="json",
+                    label="Tokenizer kwargs (JSON)",
+                    interactive=True,
+                    lines=1,
+                )
+            with gr.Column():
+                attribution_kwargs = gr.Code(
+                    value="{}",
+                    language="json",
+                    label="Attribution kwargs (JSON)",
+                    interactive=True,
+                    lines=1,
+                )
     gr.Markdown(how_it_works)
     gr.Markdown(how_to_use)
             attribution_std_threshold,
             attribution_topk,
             input_template,
+            contextless_input_current_text,
             output_template,
             special_tokens_to_keep,
+            decoder_input_output_separator,
             model_kwargs,
             tokenizer_kwargs,
             generation_kwargs,
         ],
     )
+    load_model_button.click(
+        preload_model,
+        inputs=[model_name_or_path, attribution_method, model_kwargs, tokenizer_kwargs],
+        outputs=[],
+    )
+    # Preset params
+    outputs_to_reset = [
+        model_name_or_path,
+        input_template,
+        contextless_input_current_text,
+        output_template,
+        special_tokens_to_keep,
+        decoder_input_output_separator,
+        model_kwargs,
+        tokenizer_kwargs,
+        generation_kwargs,
+        attribution_kwargs,
+    ]
+    reset_kwargs = {
+        "fn": set_default_preset,
+        "inputs": None,
+        "outputs": outputs_to_reset,
+    }
+    # Presets
+    default_preset.click(**reset_kwargs)
+    cora_preset.click(**reset_kwargs).then(
+        set_cora_preset,
+        outputs=[model_name_or_path, input_template, contextless_input_current_text],
+    )
+    zephyr_preset.click(**reset_kwargs).then(
+        set_zephyr_preset,
+        outputs=[
+            model_name_or_path,
+            input_template,
+            contextless_input_current_text,
+            decoder_input_output_separator,
+        ],
+    )
+    multilingual_mt_template.click(**reset_kwargs).then(
+        set_mmt_preset,
+        outputs=[model_name_or_path, input_template, output_template, tokenizer_kwargs],
+    )
+    chatml_template.click(**reset_kwargs).then(
+        set_chatml_preset,
+        outputs=[
+            model_name_or_path,
+            input_template,
+            contextless_input_current_text,
+            decoder_input_output_separator,
+            special_tokens_to_keep,
+        ],
+    )
+    towerinstruct_template.click(**reset_kwargs).then(
+        set_towerinstruct_preset,
+        outputs=[
+            model_name_or_path,
+            input_template,
+            contextless_input_current_text,
+            decoder_input_output_separator,
+        ],
+    )
 demo.launch(allowed_paths=["outputs/"])

contents.py CHANGED Viewed

@@ -48,6 +48,6 @@ citation = r"""
 examples = [
     [
         "When was Banff National Park established?",
-        "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110–180 kilometres (68–112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain.",
     ]
 ]

 examples = [
     [
         "When was Banff National Park established?",
+        "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110-180 kilometres (68-112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain.",
     ]
 ]

presets.py ADDED Viewed

	@@ -0,0 +1,58 @@

+def set_cora_preset():
+    return (
+        "gsarti/cora_mgen",  # model_name_or_path
+        "<Q>:{current} <P>:{context}",  # input_template
+        "<Q>:{current}",  # input_current_text_template
+    )
+def set_default_preset():
+    return (
+        "gpt2",  # model_name_or_path
+        "{current} {context}",  # input_template
+        "{current}",  # input_current_template
+        "{current}",  # output_template
+        [],  # special_tokens_to_keep
+        "",  # decoder_input_output_separator
+        "{}",  # model_kwargs
+        "{}",  # tokenizer_kwargs
+        "{}",  # generation_kwargs
+        "{}",  # attribution_kwargs
+    )
+def set_zephyr_preset():
+    return (
+        "stabilityai/stablelm-2-zephyr-1_6b",  # model_name_or_path
+        "<|system|>\n{context}</s>\n<|user|>\n{current}</s>\n<|assistant|>\n",  # input_template
+        "<|user|>\n{current}</s>\n<|assistant|>\n",  # input_current_text_template
+        "\n",  # decoder_input_output_separator
+    )
+def set_chatml_preset():
+    return (
+        "Qwen/Qwen1.5-0.5B-Chat",  # model_name_or_path
+        "<|im_start|>system\n{context}<|im_end|>\n<|im_start|>user\n{current}<|im_end|>\n<|im_start|>assistant\n",  # input_template
+        "<|im_start|>user\n{current}<|im_end|>\n<|im_start|>assistant\n",  # input_current_text_template
+        "",  # decoder_input_output_separator
+        ["<|im_start|>", "<|im_end|>"],  # special_tokens_to_keep
+    )
+def set_mmt_preset():
+    return (
+        "facebook/mbart-large-50-one-to-many-mmt",  # model_name_or_path
+        "{context} {current}",  # input_template
+        "{context} {current}",  # output_template
+        '{\n\t"src_lang": "en_XX",\n\t"tgt_lang": "fr_XX"\n}',  # tokenizer_kwargs
+    )
+def set_towerinstruct_preset():
+    return (
+        "Unbabel/TowerInstruct-7B-v0.1",  # model_name_or_path
+        "<|im_start|>user\nSource: {current}\nContext: {context}\nTranslate the above text into French. Use the context to guide your answer.\nTarget:<|im_end|>\n<|im_start|>assistant\n",  # input_template
+        "<|im_start|>user\nSource: {current}\nTranslate the above text into French.\nTarget:<|im_end|>\n<|im_start|>assistant\n",  # input_current_text_template
+        "",  # decoder_input_output_separator
+    )

utils.py CHANGED Viewed

@@ -1,7 +1,5 @@
-from copy import deepcopy
 from typing import Optional
-from inseq import load_model
 from inseq.commands.attribute_context.attribute_context_args import AttributeContextArgs
 from inseq.commands.attribute_context.attribute_context_helpers import (
     AttributeContextOutput,
@@ -81,7 +79,6 @@ def get_formatted_attribute_context_results(
                 cci_out.output_context_scores,
                 cci_out.input_context_scores,
                 is_target=True,
-                context_type="Output",
             )
         out += [
             ("\n\n" if example_idx > 1 else "", None),
@@ -95,16 +92,6 @@ def get_formatted_attribute_context_results(
             out += [("\nInput context:\t", None)]
             out += input_context_tokens
         if args.has_output_context:
-            out += [("\\Output context:\t", None)]
             out += output_context_tokens
     return out
-def get_tuples_from_output(output: AttributeContextOutput):
-    model = load_model(
-        output.info.model_name_or_path,
-        output.info.attribution_method,
-        model_kwargs=deepcopy(output.info.model_kwargs),
-        tokenizer_kwargs=deepcopy(output.info.tokenizer_kwargs),
-    )
-    return get_formatted_attribute_context_results(model, output.info, output)

 from typing import Optional
 from inseq.commands.attribute_context.attribute_context_args import AttributeContextArgs
 from inseq.commands.attribute_context.attribute_context_helpers import (
     AttributeContextOutput,
                 cci_out.output_context_scores,
                 cci_out.input_context_scores,
                 is_target=True,
             )
         out += [
             ("\n\n" if example_idx > 1 else "", None),
             out += [("\nInput context:\t", None)]
             out += input_context_tokens
         if args.has_output_context:
+            out += [("\nOutput context:\t", None)]
             out += output_context_tokens
     return out