import streamlit as st from helper import ( load_dataset, search, get_file_paths, get_cordinates, get_images_from_s3_to_display, get_images_with_bounding_boxes_from_s3, load_dataset_with_limit ) import os import time # Load environment variables AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") # Predefined list of datasets datasets = ["WayveScenes", "MajorTom-Germany"] description = { "WayveScenes": "A large-scale dataset featuring diverse urban driving scenes, captured from autonomous vehicles to advance AI perception and navigation in complex environments.", "MajorTom-Germany": "A geospatial dataset containing satellite imagery from across Germany, designed for tasks like land-use classification, environmental monitoring, and earth observation analytics." } selection = { 'WayveScenes': [1, 8], "MajorTom-Germany": [1, 1] } folder_path_dict = { "WayveScenes" : 'WayveScenes/', "MajorTom-Germany": "MajorTom-Europe/" } # AWS S3 bucket name bucket_name = "datasets-quasara-io" # Streamlit App def main(): # Initialize session state variables if not already initialized if 'search_in_small_objects' not in st.session_state: st.session_state.search_in_small_objects = False if 'dataset_number' not in st.session_state: st.session_state.dataset_number = 1 if 'df' not in st.session_state: st.session_state.df = None st.title("Semantic Search and Image Display") # Select dataset from dropdown dataset_name = st.selectbox("Select Dataset", datasets) folder_path = folder_path_dict[dataset_name] st.caption(description[dataset_name]) if st.checkbox("Enable Small Object Search", value=st.session_state.search_in_small_objects): st.session_state.search_in_small_objects = True st.text("Small Object Search Enabled") st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][1] + 1))) st.text(f"You have selected Split Dataset {st.session_state.dataset_number}") else: st.session_state.search_in_small_objects = False st.text("Small Object Search Disabled") st.session_state.dataset_number = st.selectbox("Select Subset of Data", list(range(1, selection[dataset_name][0] + 1))) st.text(f"You have selected Main Dataset {st.session_state.dataset_number}") dataset_limit = st.slider("Size of Dataset to be searched from", min_value=1000, max_value=30000, value=10000) st.text(f'The smaller the dataset the faster the search will work.') # Load dataset with limit only if not already loaded if st.button("Load Dataset"): try: loading_dataset_text = st.empty() loading_dataset_text.text("Loading Dataset...") loading_dataset_bar = st.progress(0) # Memory profiling mem_usage = memory_usage((load_dataset_with_limit, (dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects), {"limit": dataset_limit})) st.write(f"Memory used for loading the dataset: {mem_usage[-1]:.2f} MB") # Simulate dataset loading progress for i in range(0, 100, 25): time.sleep(0.2) # Simulate work being done loading_dataset_bar.progress(i + 25) # Load dataset and monitor CPU and memory df, total_rows = load_dataset_with_limit(dataset_name, st.session_state.dataset_number, st.session_state.search_in_small_objects, limit=dataset_limit) # Store loaded dataset in session state st.session_state.df = df loading_dataset_bar.progress(100) loading_dataset_text.text("Dataset loaded successfully!") st.success(f"Dataset loaded successfully with {len(df)} rows.") except Exception as e: st.error(f"Failed to load dataset: {e}") # Input search query query = st.text_input("Enter your search query") # Number of results to display limit = st.number_input("Number of results to display", min_value=1, max_value=10, value=10) # Search button if st.button("Search"): # Validate input if not query: st.warning("Please enter a search query.") else: try: # Progress bar for search search_loading_text = st.empty() search_loading_text.text("Searching...") search_progress_bar = st.progress(0) # Perform search on the loaded dataset from session state df = st.session_state.df if st.session_state.search_in_small_objects: results = search(query, df, limit) top_k_paths = get_file_paths(df, results) top_k_cordinates = get_cordinates(df, results) else: # Normal Search results = search(query, df, limit) top_k_paths = get_file_paths(df, results) # Complete the search progress search_progress_bar.progress(100) search_loading_text.text("Search completed!") # Load Images with Bounding Boxes if applicable if st.session_state.search_in_small_objects and top_k_paths and top_k_cordinates: get_images_with_bounding_boxes_from_s3(bucket_name, top_k_paths, top_k_cordinates, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path) elif not st.session_state.search_in_small_objects and top_k_paths: st.write(f"Displaying top {len(top_k_paths)} results for query '{query}':") get_images_from_s3_to_display(bucket_name, top_k_paths, AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, folder_path) else: st.write("No results found.") except Exception as e: st.error(f"Search failed: {e}") if __name__ == "__main__": main()