Ahmed235 commited on
Commit
8cb1867
1 Parent(s): b0f6ae0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -31
app.py CHANGED
@@ -1,17 +1,8 @@
 
1
  from pptx import Presentation
2
  import re
3
- import gradio as gr
4
- from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
- import torch
6
- import torch.nn.functional as F
7
  from transformers import pipeline
8
 
9
- # Load the pre-trained model and tokenizer
10
- tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
11
- model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
12
- device = torch.device("cpu")
13
- model = model.to(device) # Move the model to the CPU
14
-
15
  # Create a summarization pipeline
16
  summarizer = pipeline("summarization", model="Falconsai/text_summarization")
17
 
@@ -26,30 +17,13 @@ def extract_text_from_pptx(file_path):
26
 
27
  def predict_pptx_content(file_path):
28
  try:
29
- str(extracted_text) = extract_text_from_pptx(file_path)
30
- str(cleaned_text) = re.sub(r'\s+', ' ', extracted_text)
31
-
32
- # Tokenize and encode the cleaned text
33
- input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
34
- input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU
35
-
36
- # Perform inference
37
- with torch.no_grad():
38
- outputs = model(**input_encoding)
39
- logits = outputs.logits
40
-
41
- probabilities = F.softmax(logits, dim=1)
42
-
43
- predicted_label_id = torch.argmax(logits, dim=1).item()
44
- predicted_label = model.config.id2label[predicted_label_id]
45
- predicted_probability = probabilities[0][predicted_label_id].item()
46
 
47
  # Summarize the cleaned text
48
  summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
49
 
50
  prediction = {
51
- "Predicted Label": predicted_label,
52
- "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
53
  "Summary": summary
54
  }
55
 
@@ -64,10 +38,10 @@ def predict_pptx_content(file_path):
64
  iface = gr.Interface(
65
  fn=predict_pptx_content,
66
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
67
- outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
68
  live=False, # Change to True for one-time analysis
69
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
70
  )
71
 
72
  # Deploy the Gradio interface
73
- iface.launch(share=True)
 
1
+ import gradio as gr
2
  from pptx import Presentation
3
  import re
 
 
 
 
4
  from transformers import pipeline
5
 
 
 
 
 
 
 
6
  # Create a summarization pipeline
7
  summarizer = pipeline("summarization", model="Falconsai/text_summarization")
8
 
 
17
 
18
  def predict_pptx_content(file_path):
19
  try:
20
+ extracted_text = extract_text_from_pptx(file_path)
21
+ cleaned_text = re.sub(r'\s+', ' ', extracted_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Summarize the cleaned text
24
  summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
25
 
26
  prediction = {
 
 
27
  "Summary": summary
28
  }
29
 
 
38
  iface = gr.Interface(
39
  fn=predict_pptx_content,
40
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
41
+ outputs="text", # Only output the summary
42
  live=False, # Change to True for one-time analysis
43
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
44
  )
45
 
46
  # Deploy the Gradio interface
47
+ iface.launch(share=True)