File size: 2,741 Bytes
98c0f54 18332e8 d6ff263 bbeaa3a e993aed 09ca2da 5bc6c98 18332e8 e025c42 09ca2da 5bc6c98 2bb61b8 5bc6c98 2bb61b8 5bc6c98 d6ff263 5bc6c98 12b0ed7 95d05cb 5bc6c98 8cb1867 b246175 d6ff263 18332e8 1b6c7fd 9ad6938 2e63d67 ae2295f 5bc6c98 2e63d67 5bc6c98 95d05cb 12b0ed7 5bc6c98 12b0ed7 e36289c d8e3fd6 d6ff263 bcb2ab6 12b0ed7 124a463 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from transformers import pipeline
from pptx import Presentation
import re
import json
# Load the classification and summarization pipelines
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
# Cache for model weights
classification_model_loaded = False
summarization_model_loaded = False
def load_models():
global classifier, summarizer, classification_model_loaded, summarization_model_loaded
if not classification_model_loaded:
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
classification_model_loaded = True
if not summarization_model_loaded:
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
summarization_model_loaded = True
# Extract text from PowerPoint
def extract_text_from_pptx(file_path):
try:
presentation = Presentation(file_path)
text = []
for slide in presentation.slides:
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
return "\n".join(text)
except Exception as e:
print(f"Error extracting text from PowerPoint: {e}")
return ""
# Limit text length
def limit_text_length(text, max_length=512):
return text[:max_length]
# Predict content from PowerPoint
def predict_pptx_content(file_path):
try:
load_models() # Load models if not loaded already
extracted_text = extract_text_from_pptx(file_path)
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
limited_text = limit_text_length(cleaned_text)
result = classifier(limited_text)
predicted_label = result[0]['label']
predicted_probability = result[0]['score']
summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
output = {
"predicted_label": predicted_label,
"evaluation": predicted_probability,
"summary": summary
}
return json.dumps(output, indent=3)
except Exception as e:
print(f"Error predicting content from PowerPoint: {e}")
return {"error": str(e)}
# Gradio interface
iface = gr.Interface(
fn=predict_pptx_content,
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
outputs=gr.Textbox("output"),
live=False,
title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)
# Deploy the Gradio interface
iface.launch(share=True)
|