Spaces:

gsarti
/

pecore

Running on Zero

App Files Files Community

gsarti commited on Feb 5

Commit

cf3d1b1

•

1 Parent(s): d6505da

Updated

Browse files

Files changed (5) hide show

app.py +309 -29
contents.py +53 -0
requirements.txt +3 -1
style.py +19 -0
utils.py +110 -0

app.py CHANGED Viewed

@@ -1,39 +1,319 @@
 import gradio as gr
 from inseq.commands.attribute_context.attribute_context import (
     AttributeContextArgs,
     attribute_context,
-    visualize_attribute_context,
 )
-def run_pecore(input_current_text, input_context_text):
-    lm_rag_prompting_example = AttributeContextArgs(
-        model_name_or_path="gsarti/cora_mgen",
-        input_context_text=input_context_text,
-        input_current_text=f"query: {input_current_text}",
-        output_template="{current}",
-        input_template="{current} passage: {context} answer:",
-        attributed_fn="contrast_prob_diff",
         show_viz=False,
-        context_sensitivity_std_threshold=0,
     )
-    out = attribute_context(lm_rag_prompting_example)
-    html = visualize_attribute_context(out, return_html=True)
-    return html
-demo = gr.Interface(
-    fn=run_pecore,
-    inputs=["text", "text"],
-    outputs="html",
-    title="🐑 Plausibility Evaluation of Context Reliance (PECoRe) 🐑",
-    description="""Given a query and a context passed as inputs to a LM, PECoRe will identify which tokens in the generated response were dependant on context, and match them with context tokens contributing to their prediction. For more information, check out our <a href="https://openreview.net/forum?id=XTHfNGI3zT" target='_blank'>ICLR 2024 paper</a>.""",
-    examples=[
-        [
-            "When was Banff National Park established?",
-            "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110–180 kilometres (68–112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain.",
-        ]
-    ],
-)
-demo.launch(share=True)

+import json
+import os
 import gradio as gr
+import spaces
+from contents import (
+    citation,
+    description,
+    examples,
+    how_it_works,
+    how_to_use,
+    subtitle,
+    title,
+)
+from gradio_highlightedtextbox import HighlightedTextbox
+from style import custom_css
+from utils import get_tuples_from_output
+from inseq import list_feature_attribution_methods, list_step_functions
 from inseq.commands.attribute_context.attribute_context import (
     AttributeContextArgs,
     attribute_context,
 )
+@spaces.GPU()
+def pecore(
+    input_current_text: str,
+    input_context_text: str,
+    output_current_text: str,
+    output_context_text: str,
+    model_name_or_path: str,
+    attribution_method: str,
+    attributed_fn: str | None,
+    context_sensitivity_metric: str,
+    context_sensitivity_std_threshold: float,
+    context_sensitivity_topk: int,
+    attribution_std_threshold: float,
+    attribution_topk: int,
+    input_template: str,
+    input_current_text_template: str,
+    output_template: str,
+    special_tokens_to_keep: str | list[str] | None,
+    model_kwargs: str,
+    tokenizer_kwargs: str,
+    generation_kwargs: str,
+    attribution_kwargs: str,
+):
+    formatted_input_current_text = input_current_text_template.format(
+        current=input_current_text
+    )
+    pecore_args = AttributeContextArgs(
+        show_intermediate_outputs=False,
+        save_path=os.path.join(os.path.dirname(__file__), "outputs/output.json"),
+        add_output_info=True,
+        viz_path=os.path.join(os.path.dirname(__file__), "outputs/output.html"),
         show_viz=False,
+        model_name_or_path=model_name_or_path,
+        attribution_method=attribution_method,
+        attributed_fn=attributed_fn,
+        attribution_selectors=None,
+        attribution_aggregators=None,
+        normalize_attributions=True,
+        model_kwargs=json.loads(model_kwargs),
+        tokenizer_kwargs=json.loads(tokenizer_kwargs),
+        generation_kwargs=json.loads(generation_kwargs),
+        attribution_kwargs=json.loads(attribution_kwargs),
+        context_sensitivity_metric=context_sensitivity_metric,
+        align_output_context_auto=False,
+        prompt_user_for_contextless_output_next_tokens=False,
+        special_tokens_to_keep=special_tokens_to_keep,
+        context_sensitivity_std_threshold=context_sensitivity_std_threshold,
+        context_sensitivity_topk=context_sensitivity_topk
+        if context_sensitivity_topk > 0
+        else None,
+        attribution_std_threshold=attribution_std_threshold,
+        attribution_topk=attribution_topk if attribution_topk > 0 else None,
+        input_current_text=formatted_input_current_text,
+        input_context_text=input_context_text if input_context_text else None,
+        input_template=input_template,
+        output_current_text=output_current_text if output_current_text else None,
+        output_context_text=output_context_text if output_context_text else None,
+        output_template=output_template,
     )
+    out = attribute_context(pecore_args)
+    return get_tuples_from_output(out), gr.Button(visible=True), gr.Button(visible=True)
+with gr.Blocks(css=custom_css) as demo:
+    gr.Markdown(title)
+    gr.Markdown(subtitle)
+    gr.Markdown(description)
+    with gr.Tab("🐑 Attributing Context"):
+        with gr.Row():
+            with gr.Column():
+                input_current_text = gr.Textbox(
+                    label="Input query", placeholder="Your input query..."
+                )
+                input_context_text = gr.Textbox(
+                    label="Input context", lines=4, placeholder="Your input context..."
+                )
+                attribute_input_button = gr.Button("Submit", variant="primary")
+            with gr.Column():
+                pecore_output_highlights = HighlightedTextbox(
+                    value=[
+                        ("This output will contain ", None),
+                        ("context sensitive", "Context sensitive"),
+                        (" generated tokens and ", None),
+                        ("influential context", "Influential context"),
+                        (" tokens.", None),
+                    ],
+                    color_map={
+                        "Context sensitive": "green",
+                        "Influential context": "blue",
+                    },
+                    show_legend=True,
+                    label="PECoRe Output",
+                    combine_adjacent=True,
+                    interactive=False,
+                )
+                with gr.Row(equal_height=True):
+                    download_output_file_button = gr.Button(
+                        "⇓ Download output",
+                        visible=False,
+                        link=os.path.join(
+                            os.path.dirname(__file__), "/file=outputs/output.json"
+                        ),
+                    )
+                    download_output_html_button = gr.Button(
+                        "🔍 Download HTML",
+                        visible=False,
+                        link=os.path.join(
+                            os.path.dirname(__file__), "/file=outputs/output.html"
+                        ),
+                    )
+        attribute_input_examples = gr.Examples(
+            examples,
+            inputs=[input_current_text, input_context_text],
+            outputs=pecore_output_highlights,
+        )
+    with gr.Tab("⚙️ Parameters"):
+        gr.Markdown("## ⚙️ PECoRe Parameters")
+        with gr.Row(equal_height=True):
+            model_name_or_path = gr.Textbox(
+                value="gsarti/cora_mgen",
+                label="Model",
+                info="Hugging Face Hub identifier of the model to analyze with PECoRe.",
+                interactive=True,
+            )
+            context_sensitivity_metric = gr.Dropdown(
+                value="kl_divergence",
+                label="Context sensitivity metric",
+                info="Metric to use to measure context sensitivity of generated tokens.",
+                choices=list_step_functions(),
+                interactive=True,
+            )
+            attribution_method = gr.Dropdown(
+                value="saliency",
+                label="Attribution method",
+                info="Attribution method identifier to identify relevant context tokens.",
+                choices=list_feature_attribution_methods(),
+                interactive=True,
+            )
+            attributed_fn = gr.Dropdown(
+                value="contrast_prob_diff",
+                label="Attributed function",
+                info="Function of model logits to use as target for the attribution method.",
+                choices=list_step_functions(),
+                interactive=True,
+            )
+        gr.Markdown("#### Results Selection Parameters")
+        with gr.Row(equal_height=True):
+            context_sensitivity_std_threshold = gr.Number(
+                value=1.0,
+                label="Context sensitivity threshold",
+                info="Select N to keep context sensitive tokens with scores above N * std. 0 = above mean.",
+                precision=1,
+                minimum=0.0,
+                maximum=5.0,
+                step=0.5,
+                interactive=True,
+            )
+            context_sensitivity_topk = gr.Number(
+                value=0,
+                label="Context sensitivity top-k",
+                info="Select N to keep top N context sensitive tokens. 0 = keep all.",
+                interactive=True,
+                precision=0,
+                minimum=0,
+                maximum=10,
+            )
+            attribution_std_threshold = gr.Number(
+                value=1.0,
+                label="Attribution threshold",
+                info="Select N to keep attributed tokens with scores above N * std. 0 = above mean.",
+                precision=1,
+                minimum=0.0,
+                maximum=5.0,
+                step=0.5,
+                interactive=True,
+            )
+            attribution_topk = gr.Number(
+                value=0,
+                label="Attribution top-k",
+                info="Select N to keep top N attributed tokens in the context. 0 = keep all.",
+                interactive=True,
+                precision=0,
+                minimum=0,
+                maximum=50,
+            )
+        gr.Markdown("#### Text Format Parameters")
+        with gr.Row(equal_height=True):
+            input_template = gr.Textbox(
+                value="{current} <P>:{context}",
+                label="Input template",
+                info="Template to format the input for the model. Use {current} and {context} placeholders.",
+                interactive=True,
+            )
+            output_template = gr.Textbox(
+                value="{current}",
+                label="Output template",
+                info="Template to format the output from the model. Use {current} and {context} placeholders.",
+                interactive=True,
+            )
+            input_current_text_template = gr.Textbox(
+                value="<Q>:{current}",
+                label="Input current text template",
+                info="Template to format the input query for the model. Use {current} placeholder.",
+                interactive=True,
+            )
+            special_tokens_to_keep = gr.Dropdown(
+                label="Special tokens to keep",
+                info="Special tokens to keep in the attribution. If empty, all special tokens are ignored.",
+                value=None,
+                multiselect=True,
+                allow_custom_value=True,
+            )
+        gr.Markdown("## ⚙️ Generation Parameters")
+        with gr.Row(equal_height=True):
+            output_current_text = gr.Textbox(
+                label="Generation output",
+                info="Specifies an output to force-decoded during generation. If blank, the model will generate freely.",
+                interactive=True,
+            )
+            output_context_text = gr.Textbox(
+                label="Generation context",
+                info="If specified, this context is used as starting point for generation. Useful for e.g. chain-of-thought reasoning.",
+                interactive=True,
+            )
+        generation_kwargs = gr.Code(
+            value="{}",
+            language="json",
+            label="Generation kwargs",
+            interactive=True,
+            lines=1,
+        )
+        gr.Markdown("## ⚙️ Other Parameters")
+        with gr.Row(equal_height=True):
+            model_kwargs = gr.Code(
+                value="{}",
+                language="json",
+                label="Model kwargs",
+                interactive=True,
+                lines=1,
+            )
+            tokenizer_kwargs = gr.Code(
+                value="{}",
+                language="json",
+                label="Tokenizer kwargs",
+                interactive=True,
+                lines=1,
+            )
+            attribution_kwargs = gr.Code(
+                value="{}",
+                language="json",
+                label="Attribution kwargs",
+                interactive=True,
+                lines=1,
+            )
+    gr.Markdown(how_it_works)
+    gr.Markdown(how_to_use)
+    gr.Markdown(citation)
+    attribute_input_button.click(
+        pecore,
+        inputs=[
+            input_current_text,
+            input_context_text,
+            output_current_text,
+            output_context_text,
+            model_name_or_path,
+            attribution_method,
+            attributed_fn,
+            context_sensitivity_metric,
+            context_sensitivity_std_threshold,
+            context_sensitivity_topk,
+            attribution_std_threshold,
+            attribution_topk,
+            input_template,
+            input_current_text_template,
+            output_template,
+            special_tokens_to_keep,
+            model_kwargs,
+            tokenizer_kwargs,
+            generation_kwargs,
+            attribution_kwargs,
+        ],
+        outputs=[
+            pecore_output_highlights,
+            download_output_file_button,
+            download_output_html_button,
+        ],
+    )
+demo.launch(allowed_paths=["outputs/"])

contents.py ADDED Viewed

	@@ -0,0 +1,53 @@

+title = "<h1 class='demo-title'>🐑 Plausibility Evaluation of Context Reliance (PECoRe) 🐑</h1>"
+subtitle = "<h2 class='demo-subtitle'>An Interpretability Framework to Detect and Attribute Context Reliance in Language Models</h2>"
+description = """
+Given a query and a context passed as inputs to a LM, PECoRe will identify which tokens in the generated
+response were dependant on context, and match them with context tokens contributing to their prediction.
+For more information, check out our <a href="https://openreview.net/forum?id=XTHfNGI3zT" target='_blank'>ICLR 2024 paper</a>.
+"""
+how_it_works = r"""
+<details>
+    <summary><h3 class="summary-label">⚙️ How Does It Work?</h3></summary>
+    <br/>
+    PECoRe uses a contrastive approach to attribute context reliance in language models.
+    It compares the model's predictions when the context is present and when it is absent, and attributes the difference in predictions to the context tokens.
+</details>
+"""
+how_to_use = r"""
+<details>
+<summary><h3 class="summary-label">🔧 How to Use PECoRe</h3></summary>
+</details>
+"""
+citation = r"""
+<details>
+    <summary><h3 class="summary-label">📚 Citing PECoRe</h3></summary>
+    @inproceedings{sarti-etal-2023-quantifying,
+        title = "Quantifying the Plausibility of Context Reliance in Neural Machine Translation",
+        author = "Sarti, Gabriele and
+            Chrupa{\l}a, Grzegorz and
+            Nissim, Malvina and
+            Bisazza, Arianna",
+        booktitle = "The Twelfth International Conference on Learning Representations (ICLR 2024)",
+        month = may,
+        year = "2024",
+        address = "Vienna, Austria",
+        publisher = "OpenReview",
+        url = "https://openreview.net/forum?id=XTHfNGI3zT"
+    }
+</details>
+"""
+examples = [
+    [
+        "When was Banff National Park established?",
+        "Banff National Park is Canada's oldest national park, established in 1885 as Rocky Mountains Park. Located in Alberta's Rocky Mountains, 110–180 kilometres (68–112 mi) west of Calgary, Banff encompasses 6,641 square kilometres (2,564 sq mi) of mountainous terrain.",
+    ]
+]

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	- ~~git+https://github.com/inseq-team/inseq.git@main~~

+spaces
+git+https://github.com/inseq-team/inseq.git@main
+gradio_highlightedtextbox

style.py ADDED Viewed

	@@ -0,0 +1,19 @@

+custom_css = """
+.demo-title {
+    text-align: center;
+    display: block;
+    margin-bottom: 0;
+    font-size: 2em;
+}
+.demo-subtitle {
+    text-align: center;
+    display: block;
+    margin-top: 0;
+    font-size: 1.5em;
+}
+.summary-label {
+    display: inline;
+}
+"""

utils.py ADDED Viewed

	@@ -0,0 +1,110 @@

+from copy import deepcopy
+from typing import Optional
+from inseq import load_model
+from inseq.commands.attribute_context.attribute_context_args import AttributeContextArgs
+from inseq.commands.attribute_context.attribute_context_helpers import (
+    AttributeContextOutput,
+    filter_rank_tokens,
+    get_filtered_tokens,
+)
+from inseq.models import HuggingfaceModel
+def get_formatted_attribute_context_results(
+    model: HuggingfaceModel,
+    args: AttributeContextArgs,
+    output: AttributeContextOutput,
+) -> str:
+    """Format the results of the context attribution process."""
+    def format_context_comment(
+        model: HuggingfaceModel,
+        has_other_context: bool,
+        special_tokens_to_keep: list[str],
+        context: str,
+        context_scores: list[float],
+        other_context_scores: Optional[list[float]] = None,
+        is_target: bool = False,
+    ) -> str:
+        context_tokens = get_filtered_tokens(
+            context,
+            model,
+            special_tokens_to_keep,
+            replace_special_characters=True,
+            is_target=is_target,
+        )
+        context_token_tuples = [(t, None) for t in context_tokens]
+        scores = context_scores
+        if has_other_context:
+            scores += other_context_scores
+        context_ranked_tokens, _ = filter_rank_tokens(
+            tokens=context_tokens,
+            scores=scores,
+            std_threshold=args.attribution_std_threshold,
+            topk=args.attribution_topk,
+        )
+        for idx, _, tok in context_ranked_tokens:
+            context_token_tuples[idx] = (tok, "Influential context")
+        return context_token_tuples
+    out = []
+    output_current_tokens = get_filtered_tokens(
+        output.output_current,
+        model,
+        args.special_tokens_to_keep,
+        replace_special_characters=True,
+        is_target=True,
+    )
+    for example_idx, cci_out in enumerate(output.cci_scores, start=1):
+        curr_output_tokens = [(t, None) for t in output_current_tokens]
+        cti_idx = cci_out.cti_idx
+        curr_output_tokens[cti_idx] = (
+            curr_output_tokens[cti_idx][0],
+            "Context sensitive",
+        )
+        if args.has_input_context:
+            input_context_tokens = format_context_comment(
+                model,
+                args.has_output_context,
+                args.special_tokens_to_keep,
+                output.input_context,
+                cci_out.input_context_scores,
+                cci_out.output_context_scores,
+            )
+        if args.has_output_context:
+            output_context_tokens = format_context_comment(
+                model,
+                args.has_input_context,
+                args.special_tokens_to_keep,
+                output.output_context,
+                cci_out.output_context_scores,
+                cci_out.input_context_scores,
+                is_target=True,
+                context_type="Output",
+            )
+        out += [
+            ("\n\n" if example_idx > 1 else "", None),
+            (
+                f"#{example_idx}.\nGenerated output:\t",
+                None,
+            ),
+        ]
+        out += curr_output_tokens
+        if args.has_input_context:
+            out += [("\nInput context:\t", None)]
+            out += input_context_tokens
+        if args.has_output_context:
+            out += [("\\Output context:\t", None)]
+            out += output_context_tokens
+    return out
+def get_tuples_from_output(output: AttributeContextOutput):
+    model = load_model(
+        output.info.model_name_or_path,
+        output.info.attribution_method,
+        model_kwargs=deepcopy(output.info.model_kwargs),
+        tokenizer_kwargs=deepcopy(output.info.tokenizer_kwargs),
+    )
+    return get_formatted_attribute_context_results(model, output.info, output)