Spaces:

philipp-zettl
/

qa-generator

Runtime error

App Files Files Community

philipp-zettl commited on Jun 13

Commit

1ccde3b

•

1 Parent(s): ddc0abc

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -63

app.py CHANGED Viewed

@@ -158,6 +158,7 @@ def find_best_parameters(eval_data, model, tokenizer, max_length=85):
         4: [2],
         6: [2], # 6x3 == 4x2
         8: [2], # 8x4 == 6x3 == 4x2
         10: [2], # 10x5 == 8x4 == 6x3 == 4x2
     }
@@ -249,7 +250,9 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
     )
     if optimize_questions:
-        q_params = find_best_parameters(list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length)
         question = run_model(
             inputs,
@@ -308,69 +311,89 @@ def create_file_download(qnas):
     return 'qnas.tsv'
-with gr.Blocks(css='.hidden_input {display: none;}') as demo:
-    with gr.Row(equal_height=True):
-        gr.Markdown(
-        """
-        # QA-Generator
-        A combination of fine-tuned flan-T5(-small) models chained into sequence
-        to generate:
-        a) a versatile set of questions
-        b) an accurate set of matching answers
-        according to a given piece of text content.
-        The idea is simple:
-        1. Add your content
-        2. Select the amount of questions you want to generate
-        2.2 (optional) Select the amount of answers you want to generate per goven question
-        3. Press generate
-        4. ???
-        5. Profit
-        If you're satisfied with the generated data set, you can export it as TSV
-        to edit or import it into your favourite tool.
-        """)
-    with gr.Row(equal_height=True):
-        with gr.Group("Content"):
-            content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
-        with gr.Group("Settings"):
-            temperature_qg = gr.Slider(label='Temperature QG', value=0.2, minimum=0, maximum=1, step=0.01)
-            temperature_qa = gr.Slider(label='Temperature QA', value=0.5, minimum=0, maximum=1, step=0.01)
-            max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
-            num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
-            num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
-            seed = gr.Number(label="seed", value=42069)
-            optimize_questions = gr.Checkbox(label="Optimize questions?", value=False)
-    with gr.Row():
-        gen_btn = gr.Button("Generate")
-    @gr.render(
-        inputs=[
-            content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
-            max_length, seed, optimize_questions
-        ],
-        triggers=[gen_btn.click]
-    )
-    def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length, seed, optimize_questions):
-        qnas = gen(
-            content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
-            max_length, seed, optimize_questions
-        )
-        df = gr.Dataframe(
-            value=[u.values() for u in qnas],
-            headers=['Question', 'Answer'],
-            col_count=2,
-            wrap=True
         )
-        pd_df = pd.DataFrame([u.values() for u in qnas], columns=['Question', 'Answer'])
-        download = gr.DownloadButton(label='Download (without headers)', value=create_file_download(pd_df))
 demo.queue()
-demo.launch()

         4: [2],
         6: [2], # 6x3 == 4x2
         8: [2], # 8x4 == 6x3 == 4x2
+        9: [3],
         10: [2], # 10x5 == 8x4 == 6x3 == 4x2
     }
     )
     if optimize_questions:
+        q_params = find_best_parameters(
+            list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length
+        )
         question = run_model(
             inputs,
     return 'qnas.tsv'
+with gr.Blocks() as demo:
+    with gr.Tab(label='Description'):
+        with gr.Row(equal_height=True):
+            gr.Markdown(
+            """
+            # QA-Generator
+            A combination of fine-tuned flan-T5(-small) models chained into sequence
+            to generate:
+            a) a versatile set of questions
+            b) an accurate set of matching answers
+            according to a given piece of text content.
+            The idea is simple:
+            1. Add your content
+            2. Select the amount of questions you want to generate
+            2.2 (optional) Select the amount of answers you want to generate per goven question
+            3. Press generate
+            4. ???
+            5. Profit
+            If you're satisfied with the generated data set, you can export it as TSV
+            to edit or import it into your favourite tool.
+            """)
+        with gr.Row(equal_height=True):
+            with gr.Accordion(label='Optimization', open=False):
+                gr.Markdown("""
+                For optimization of the question generation we apply the following combined score:
+                $$\\text{combined} = \\text{dist1} + \\text{dist2} - \\text{fluency} + \\text{contextual} - \\text{jsd}$$
+                Here's a brief explanation of each component:
+                1. **dist1 and dist2**: These represent the diversity of the generated outputs. dist1 measures the ratio of unique unigrams to total unigrams, and dist2 measures the ratio of unique bigrams to total bigrams. <u>**Higher values indicate more diverse outputs.**</u>
+                2. **fluency**: This is the perplexity of the generated outputs, which measures how well the outputs match the language model's expectations. <u>**Lower values indicate better fluency.**</u>
+                3. **contextual**: This measures the similarity between the input and generated outputs using embedding similarity. <u>**Higher values indicate better contextual relevance.**</u>
+                4. **jsd**: This is the Jensen-Shannon Divergence between the n-gram distributions of the generated outputs and the reference data. <u>**Lower values indicate greater similarity between distributions.**</u>
+                """, latex_delimiters=[{'display': False, 'left': '$$', 'right': '$$'}])
+    with gr.Tab(label='QA Generator'):
+        with gr.Row(equal_height=True):
+            with gr.Group("Content"):
+                content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
+            with gr.Group("Settings"):
+                temperature_qg = gr.Slider(label='Diversity Penalty QG', value=0.2, minimum=0, maximum=1, step=0.01)
+                temperature_qa = gr.Slider(label='Diversity Penalty QA', value=0.5, minimum=0, maximum=1, step=0.01)
+                max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
+                num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
+                num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
+                seed = gr.Number(label="seed", value=42069)
+                optimize_questions = gr.Checkbox(label="Optimize questions?", value=False)
+        with gr.Row():
+            gen_btn = gr.Button("Generate")
+        @gr.render(
+            inputs=[
+                content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
+                max_length, seed, optimize_questions
+            ],
+            triggers=[gen_btn.click]
         )
+        def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length, seed, optimize_questions):
+            if not content.strip():
+                raise gr.Error('Please enter some content to generate questions and answers.')
+            qnas = gen(
+                content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
+                max_length, seed, optimize_questions
+            )
+            df = gr.Dataframe(
+                value=[u.values() for u in qnas],
+                headers=['Question', 'Answer'],
+                col_count=2,
+                wrap=True
+            )
+            pd_df = pd.DataFrame([u.values() for u in qnas], columns=['Question', 'Answer'])
+            download = gr.DownloadButton(label='Download (without headers)', value=create_file_download(pd_df))
+        content.change(lambda x: x.strip(), content)
 demo.queue()
+demo.launch()