import gradio as gr from transformers import pipeline from pptx import Presentation import re import json # Load the classification and summarization pipelines classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification") summarizer = pipeline("summarization", model="Falconsai/text_summarization") # Cache for model weights classification_model_loaded = False summarization_model_loaded = False def load_models(): global classifier, summarizer, classification_model_loaded, summarization_model_loaded if not classification_model_loaded: classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification") classification_model_loaded = True if not summarization_model_loaded: summarizer = pipeline("summarization", model="Falconsai/text_summarization") summarization_model_loaded = True # Extract text from PowerPoint def extract_text_from_pptx(file_path): try: presentation = Presentation(file_path) text = [] for slide in presentation.slides: for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) except Exception as e: print(f"Error extracting text from PowerPoint: {e}") return "" # Limit text length def limit_text_length(text, max_length=512): return text[:max_length] # Predict content from PowerPoint def predict_pptx_content(file_path): try: load_models() # Load models if not loaded already extracted_text = extract_text_from_pptx(file_path) cleaned_text = re.sub(r'\s+', ' ', extracted_text) limited_text = limit_text_length(cleaned_text) result = classifier(limited_text) predicted_label = result[0]['label'] predicted_probability = result[0]['score'] summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text'] output = { "predicted_label": predicted_label, "evaluation": predicted_probability, "summary": summary } return json.dumps(output, indent=3) except Exception as e: print(f"Error predicting content from PowerPoint: {e}") return {"error": str(e)} # Gradio interface iface = gr.Interface( fn=predict_pptx_content, inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"), outputs=gr.Textbox("output"), live=False, title="

HackTalk Analyzer

", ) # Deploy the Gradio interface iface.launch(share=True)