bisoye commited on
Commit
59bc9a2
1 Parent(s): 1e0fb56

Update summarizer.py

Browse files
Files changed (1) hide show
  1. summarizer.py +44 -18
summarizer.py CHANGED
@@ -4,27 +4,53 @@ from langchain_cohere.llms import Cohere
4
  from langchain.chains.summarize import load_summarize_chain
5
  from pathlib import Path
6
 
 
7
  def summarize_files(method, files):
8
  # Initialize the LLM
9
  llm = Cohere(temperature=0)
10
- summaries = []
11
- # Load and read each file
12
- for file in files:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- ext = Path(file).suffix.lower()
15
- if ext == '.pdf':
16
- loader = PyPDFLoader(file)
17
- elif ext == '.docx':
18
- loader = Docx2txtLoader(file)
19
- elif ext == '.pptx':
20
- loader = UnstructuredPowerPointLoader(file)
21
- else:
22
- raise ValueError(f"Unsupported file extension: {ext}")
23
 
24
- docs = loader.load_and_split()
25
- # Initialize a summarization chain with the specified method
26
- summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
27
- summary = summarization_chain.run(docs)
28
- summaries.append(summary)
29
 
30
- return summaries
 
4
  from langchain.chains.summarize import load_summarize_chain
5
  from pathlib import Path
6
 
7
+
8
  def summarize_files(method, files):
9
  # Initialize the LLM
10
  llm = Cohere(temperature=0)
11
+
12
+ ext = Path(file).suffix.lower()
13
+ if ext == '.pdf':
14
+ loader = PyPDFLoader(file)
15
+ elif ext == '.docx':
16
+ loader = Docx2txtLoader(file)
17
+ elif ext == '.pptx':
18
+ loader = UnstructuredPowerPointLoader(file)
19
+ else:
20
+ raise ValueError(f"Unsupported file extension: {ext}")
21
+
22
+ docs = loader.load_and_split()
23
+ # Initialize a summarization chain with the specified method
24
+ summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
25
+ summary = summarization_chain.run(docs)
26
+
27
+ return summary
28
+
29
+
30
+
31
+
32
+
33
+ # def summarize_files(method, files):
34
+ # # Initialize the LLM
35
+ # llm = Cohere(temperature=0)
36
+ # summaries = []
37
+ # # Load and read each file
38
+ # for file in files:
39
 
40
+ # ext = Path(file).suffix.lower()
41
+ # if ext == '.pdf':
42
+ # loader = PyPDFLoader(file)
43
+ # elif ext == '.docx':
44
+ # loader = Docx2txtLoader(file)
45
+ # elif ext == '.pptx':
46
+ # loader = UnstructuredPowerPointLoader(file)
47
+ # else:
48
+ # raise ValueError(f"Unsupported file extension: {ext}")
49
 
50
+ # docs = loader.load_and_split()
51
+ # # Initialize a summarization chain with the specified method
52
+ # summarization_chain = load_summarize_chain(llm=llm, chain_type=method)
53
+ # summary = summarization_chain.run(docs)
54
+ # summaries.append(summary)
55
 
56
+ # return summaries