File size: 2,741 Bytes
98c0f54
18332e8
d6ff263
bbeaa3a
e993aed
09ca2da
5bc6c98
18332e8
e025c42
09ca2da
5bc6c98
 
 
 
 
 
 
 
 
 
 
 
 
 
2bb61b8
5bc6c98
 
 
 
 
 
 
 
 
 
 
2bb61b8
5bc6c98
d6ff263
 
 
5bc6c98
12b0ed7
95d05cb
5bc6c98
8cb1867
b246175
d6ff263
 
18332e8
 
1b6c7fd
9ad6938
2e63d67
 
 
ae2295f
5bc6c98
2e63d67
5bc6c98
95d05cb
12b0ed7
5bc6c98
12b0ed7
 
e36289c
d8e3fd6
d6ff263
bcb2ab6
12b0ed7
 
 
124a463
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import gradio as gr
from transformers import pipeline
from pptx import Presentation
import re
import json

# Load the classification and summarization pipelines
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

# Cache for model weights
classification_model_loaded = False
summarization_model_loaded = False

def load_models():
    global classifier, summarizer, classification_model_loaded, summarization_model_loaded
    if not classification_model_loaded:
        classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
        classification_model_loaded = True
    if not summarization_model_loaded:
        summarizer = pipeline("summarization", model="Falconsai/text_summarization")
        summarization_model_loaded = True

# Extract text from PowerPoint
def extract_text_from_pptx(file_path):
    try:
        presentation = Presentation(file_path)
        text = []
        for slide in presentation.slides:
            for shape in slide.shapes:
                if hasattr(shape, "text"):
                    text.append(shape.text)
        return "\n".join(text)
    except Exception as e:
        print(f"Error extracting text from PowerPoint: {e}")
        return ""

# Limit text length
def limit_text_length(text, max_length=512):
    return text[:max_length]

# Predict content from PowerPoint
def predict_pptx_content(file_path):
    try:
        load_models()  # Load models if not loaded already
        extracted_text = extract_text_from_pptx(file_path)
        cleaned_text = re.sub(r'\s+', ' ', extracted_text)
        limited_text = limit_text_length(cleaned_text)
        result = classifier(limited_text)
        predicted_label = result[0]['label']
        predicted_probability = result[0]['score']
        summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
        output = {
            "predicted_label": predicted_label,
            "evaluation": predicted_probability,
            "summary": summary
        }
        return json.dumps(output, indent=3)
    except Exception as e:
        print(f"Error predicting content from PowerPoint: {e}")
        return {"error": str(e)}

# Gradio interface
iface = gr.Interface(
    fn=predict_pptx_content,
    inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
    outputs=gr.Textbox("output"),
    live=False,
    title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)

# Deploy the Gradio interface
iface.launch(share=True)