raj999 commited on
Commit
ef49f82
1 Parent(s): 5777b00

Create rag.py

Browse files
Files changed (1) hide show
  1. rag.py +84 -0
rag.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_community.document_loaders import Docx2txtLoader
2
+ from langchain_community.document_transformers import DoctranTextTranslator
3
+ from langchain_core.documents import Document
4
+ from dotenv import load_dotenv
5
+ from langchain_community.retrievers import BM25Retriever
6
+ import openai
7
+
8
+ # Set your OpenAI API key
9
+ # openai.api_key = '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'
10
+
11
+ from openai import OpenAI
12
+ from os import getenv
13
+ from langchain_core.documents import Document
14
+ from langchain_text_splitters import CharacterTextSplitter
15
+
16
+ def setup_retriever():
17
+ knowledge_file = "E:/Backup_K20pro/Download/treesat_benchmark/Canopy species list and uses (PT)_pt_en.docx"
18
+ loader = Docx2txtLoader(knowledge_file)
19
+ data = loader.load()
20
+ text_splitter = CharacterTextSplitter(
21
+ separator="-",
22
+ chunk_size=600,
23
+ chunk_overlap=0,
24
+ length_function=len,
25
+ is_separator_regex=False,
26
+ )
27
+ chunks = text_splitter.split_documents(data)
28
+ # print(chunks)
29
+ retriever = BM25Retriever.from_documents(chunks)
30
+ retriever.k = 1
31
+ return retriever
32
+
33
+ retriever = setup_retriever()
34
+ species = 'Byrsonima'
35
+ species_info = retriever.invoke(f"Scientific name:{species}")
36
+ print(species_info)
37
+ # qa_translator = DoctranTextTranslator(language="english")
38
+ # translated_document = qa_translator.transform_documents(data)
39
+ # print(translated_document)
40
+
41
+
42
+ info = 'Scientific name:Licaniasp.\n\nFamily:Chrysobalanaceae\n\nPopular name:They are generally known as caripé or macucu, among other generic names.\n\nHabitat:LicaniaIt is a large genus, with dozens of species distributed in all Amazon habitats, species that are difficult to identify without fertile material.\n\nUses:Some species can be edible, others known as caripé had (have) their bark roasted (presence of silica), macerated and together with clay used in the preparation of ceramics by indigenous populations.'
43
+ # gets API Key from environment variable OPENAI_API_KEY
44
+ def setup_client():
45
+ client = OpenAI(
46
+ base_url="https://openrouter.ai/api/v1",
47
+ api_key=getenv("OPENROUTER_API_KEY"),
48
+ )
49
+
50
+ return client
51
+
52
+ client = setup_client()
53
+
54
+ def generate_image(species_info, client):
55
+ completion = client.chat.completions.create(
56
+ model="openai/gpt-3.5-turbo",
57
+ messages=[
58
+ {
59
+ "role": "user",
60
+
61
+ "content": f"Using the trees species information provided below, Using the information in the 'Uses:' section. Generate 1 useful and informative unicode image to be used to be placed on a drone panoramic image. Tree species info:{species_info}",
62
+ },
63
+ ],
64
+ )
65
+ return completion.choices[0].message.content
66
+
67
+
68
+ if __name__ == '__main__':
69
+ ans = generate_image(species_info, client)
70
+ print(ans)
71
+ # # Define the prompt and parameters for the request
72
+ # prompt = "Once upon a time"
73
+ # response = openai.Completion.create(
74
+ # engine="gpt-3.5-turbo", # Use the GPT-3.5 model
75
+ # prompt=prompt,
76
+ # max_tokens=50, # Adjust the number of tokens based on your requirement
77
+ # n=1,
78
+ # stop=None,
79
+ # temperature=0.7,
80
+ # )
81
+
82
+ # # Print the generated text
83
+ # generated_text = response.choices[0].text.strip()
84
+ # print(generated_text)