inie2003 commited on
Commit
b0bacdc
1 Parent(s): 022e3b7

spliting major tom europe in smaller countries

Browse files
Files changed (1) hide show
  1. app.py +48 -37
app.py CHANGED
@@ -1,62 +1,59 @@
1
  import streamlit as st
2
- import logging
3
- import os
4
- import time
5
- import psutil
6
- from helper import (
7
  load_dataset, search, get_file_paths,
8
  get_cordinates, get_images_from_s3_to_display,
9
  get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
10
  )
11
-
12
- # Configure logging
13
- logging.basicConfig(level=logging.INFO)
 
14
 
15
  # Load environment variables
16
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
17
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
18
 
19
  # Predefined list of datasets
20
- datasets = ["WayveScenes", "MajorTom-Europe"]
21
  description = {
22
- "StopSign_test": "A test dataset for me",
23
- "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes.",
24
- "MajorTom-Europe": "A geospatial dataset containing satellite imagery from across Europe."
25
  }
26
  selection = {
27
  'WayveScenes': [1, 8],
28
- "MajorTom-Europe": [1, 18]
29
  }
30
-
 
 
 
31
  # AWS S3 bucket name
32
  bucket_name = "datasets-quasara-io"
33
 
34
- # Function to log CPU and memory usage
35
- def log_resource_usage(stage):
36
- cpu_usage = psutil.cpu_percent(interval=1)
37
- memory_info = psutil.virtual_memory()
38
- logging.info(f"{stage} - CPU Usage: {cpu_usage}%, Memory Usage: {memory_info.percent}%")
39
 
40
  # Streamlit App
41
  def main():
42
  # Initialize session state variables if not already initialized
43
  if 'search_in_small_objects' not in st.session_state:
44
  st.session_state.search_in_small_objects = False
 
45
  if 'dataset_number' not in st.session_state:
46
  st.session_state.dataset_number = 1
 
47
  if 'df' not in st.session_state:
48
  st.session_state.df = None
49
 
50
  st.title("Semantic Search and Image Display")
51
- log_resource_usage("Initialization")
52
 
53
  # Select dataset from dropdown
54
  dataset_name = st.selectbox("Select Dataset", datasets)
55
 
56
- if dataset_name == 'StopSign_test':
57
- folder_path = ""
58
- else:
59
- folder_path = f'{dataset_name}/'
60
 
61
  st.caption(description[dataset_name])
62
 
@@ -64,13 +61,15 @@ def main():
64
  st.session_state.search_in_small_objects = True
65
  st.text("Small Object Search Enabled")
66
  st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
 
67
  else:
68
  st.session_state.search_in_small_objects = False
69
  st.text("Small Object Search Disabled")
70
  st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
 
71
 
72
- dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=20000, value=10000)
73
- st.text(f'The smaller the dataset, the faster the search will work.')
74
 
75
  # Load dataset with limit only if not already loaded
76
  if st.button("Load Dataset"):
@@ -78,25 +77,32 @@ def main():
78
  loading_dataset_text = st.empty()
79
  loading_dataset_text.text("Loading Dataset...")
80
  loading_dataset_bar = st.progress(0)
81
-
 
 
 
 
82
  # Simulate dataset loading progress
83
  for i in range(0, 100, 25):
84
- time.sleep(0.2)
85
  loading_dataset_bar.progress(i + 25)
86
 
87
- log_resource_usage("Before Loading Dataset")
88
  df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
 
 
89
  st.session_state.df = df
90
-
91
  loading_dataset_bar.progress(100)
92
  loading_dataset_text.text("Dataset loaded successfully!")
93
  st.success(f"Dataset loaded successfully with {len(df)} rows.")
94
- log_resource_usage("After Loading Dataset")
95
-
 
 
96
  except Exception as e:
97
- logging.error(f"Failed to load dataset: {e}")
98
  st.error(f"Failed to load dataset: {e}")
99
-
 
100
  # Input search query
101
  query = st.text_input("Enter your search query")
102
 
@@ -110,23 +116,25 @@ def main():
110
  st.warning("Please enter a search query.")
111
  else:
112
  try:
 
113
  search_loading_text = st.empty()
114
  search_loading_text.text("Searching...")
115
  search_progress_bar = st.progress(0)
116
 
117
- log_resource_usage("Before Search")
118
  df = st.session_state.df
119
  if st.session_state.search_in_small_objects:
120
  results = search(query, df, limit)
121
  top_k_paths = get_file_paths(df, results)
122
  top_k_cordinates = get_cordinates(df, results)
123
  else:
 
124
  results = search(query, df, limit)
125
  top_k_paths = get_file_paths(df, results)
126
 
 
127
  search_progress_bar.progress(100)
128
  search_loading_text.text("Search completed!")
129
- log_resource_usage("After Search")
130
 
131
  # Load Images with Bounding Boxes if applicable
132
  if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
@@ -134,11 +142,14 @@ def main():
134
  elif not st.session_state.search_in_small_objects and top_k_paths:
135
  st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
136
  get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
 
137
  else:
138
  st.write("No results found.")
139
 
 
 
 
140
  except Exception as e:
141
- logging.error(f"Search failed: {e}")
142
  st.error(f"Search failed: {e}")
143
 
144
  if __name__ == "__main__":
 
1
  import streamlit as st
2
+ from helper3 import (
 
 
 
 
3
  load_dataset, search, get_file_paths,
4
  get_cordinates, get_images_from_s3_to_display,
5
  get_images_with_bounding_boxes_from_s3, load_dataset_with_limit
6
  )
7
+ import os
8
+ import time
9
+ import psutil
10
+ from memory_profiler import memory_usage
11
 
12
  # Load environment variables
13
  AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
14
  AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
15
 
16
  # Predefined list of datasets
17
+ datasets = ["WayveScenes", "MajorTom-Germany"]
18
  description = {
19
+ "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.",
20
+ "MajorTom-Germany": "A geospatial dataset containing satellite imagery from across Germany, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics."
 
21
  }
22
  selection = {
23
  'WayveScenes': [1, 8],
24
+ "MajorTom-Germany": [1, 1]
25
  }
26
+ folder_path_dict = {
27
+ "WayveScenes" : 'WayveScenes/',
28
+ "MajorTom-Germany": "MajorTom-Europe/"
29
+ }
30
  # AWS S3 bucket name
31
  bucket_name = "datasets-quasara-io"
32
 
33
+ # Function to display CPU and memory usage
34
+ def display_usage():
35
+ process = psutil.Process(os.getpid())
36
+ st.write(f"CPU usage: {process.cpu_percent()}%")
37
+ st.write(f"Memory usage: {process.memory_info().rss / (1024 ** 2)} MB")
38
 
39
  # Streamlit App
40
  def main():
41
  # Initialize session state variables if not already initialized
42
  if 'search_in_small_objects' not in st.session_state:
43
  st.session_state.search_in_small_objects = False
44
+
45
  if 'dataset_number' not in st.session_state:
46
  st.session_state.dataset_number = 1
47
+
48
  if 'df' not in st.session_state:
49
  st.session_state.df = None
50
 
51
  st.title("Semantic Search and Image Display")
 
52
 
53
  # Select dataset from dropdown
54
  dataset_name = st.selectbox("Select Dataset", datasets)
55
 
56
+ folder_path = folder_path_dict[dataset_name]
 
 
 
57
 
58
  st.caption(description[dataset_name])
59
 
 
61
  st.session_state.search_in_small_objects = True
62
  st.text("Small Object Search Enabled")
63
  st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1)))
64
+ st.text(f"You have selected Split Dataset {st.session_state.dataset_number}")
65
  else:
66
  st.session_state.search_in_small_objects = False
67
  st.text("Small Object Search Disabled")
68
  st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1)))
69
+ st.text(f"You have selected Main Dataset {st.session_state.dataset_number}")
70
 
71
+ dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=30000, value=10000)
72
+ st.text(f'The smaller the dataset the faster the search will work.')
73
 
74
  # Load dataset with limit only if not already loaded
75
  if st.button("Load Dataset"):
 
77
  loading_dataset_text = st.empty()
78
  loading_dataset_text.text("Loading Dataset...")
79
  loading_dataset_bar = st.progress(0)
80
+
81
+ # Memory profiling
82
+ mem_usage = memory_usage((load_dataset_with_limit, (dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects), {"limit": dataset_limit}))
83
+ st.write(f"Memory used for loading the dataset: {mem_usage[-1]:.2f} MB")
84
+
85
  # Simulate dataset loading progress
86
  for i in range(0, 100, 25):
87
+ time.sleep(0.2) # Simulate work being done
88
  loading_dataset_bar.progress(i + 25)
89
 
90
+ # Load dataset and monitor CPU and memory
91
  df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit)
92
+
93
+ # Store loaded dataset in session state
94
  st.session_state.df = df
 
95
  loading_dataset_bar.progress(100)
96
  loading_dataset_text.text("Dataset loaded successfully!")
97
  st.success(f"Dataset loaded successfully with {len(df)} rows.")
98
+
99
+ # Display CPU and memory usage
100
+ display_usage()
101
+
102
  except Exception as e:
 
103
  st.error(f"Failed to load dataset: {e}")
104
+
105
+
106
  # Input search query
107
  query = st.text_input("Enter your search query")
108
 
 
116
  st.warning("Please enter a search query.")
117
  else:
118
  try:
119
+ # Progress bar for search
120
  search_loading_text = st.empty()
121
  search_loading_text.text("Searching...")
122
  search_progress_bar = st.progress(0)
123
 
124
+ # Perform search on the loaded dataset from session state
125
  df = st.session_state.df
126
  if st.session_state.search_in_small_objects:
127
  results = search(query, df, limit)
128
  top_k_paths = get_file_paths(df, results)
129
  top_k_cordinates = get_cordinates(df, results)
130
  else:
131
+ # Normal Search
132
  results = search(query, df, limit)
133
  top_k_paths = get_file_paths(df, results)
134
 
135
+ # Complete the search progress
136
  search_progress_bar.progress(100)
137
  search_loading_text.text("Search completed!")
 
138
 
139
  # Load Images with Bounding Boxes if applicable
140
  if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates:
 
142
  elif not st.session_state.search_in_small_objects and top_k_paths:
143
  st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':")
144
  get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path)
145
+
146
  else:
147
  st.write("No results found.")
148
 
149
+ # Display CPU and memory usage
150
+ display_usage()
151
+
152
  except Exception as e:
 
153
  st.error(f"Search failed: {e}")
154
 
155
  if __name__ == "__main__":