Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Nov 8, 2023

Commit

26c9c07

•

1 Parent(s): aaf86cc

file upload gallery

Browse files

Files changed (4) hide show

app.py +8 -15
vouchervision/component_detector/component_detector.py +6 -2
vouchervision/utils_VoucherVision.py +3 -1
vouchervision/vouchervision_main.py +11 -4

app.py CHANGED Viewed

@@ -618,7 +618,7 @@ def show_available_APIs():
     emoji_bad = ":x:"
     table = {
-        'Google OCR API': emoji_good if st.session_state['has_key_google_OCR'] else emoji_bad,
         'OpenAI API': emoji_good if st.session_state['has_key_openai'] else emoji_bad,
         'Palm API': emoji_good if st.session_state['has_key_palm2'] else emoji_bad,
         'OpenAI API (Azure)': emoji_good if st.session_state['has_key_azure'] else emoji_bad,
@@ -1252,6 +1252,7 @@ def content_header():
     with col_run_1:
         show_header_welcome()
         st.subheader('Run VoucherVision')
         if check_if_usable():
             if st.button("Start Processing", type='primary'):
@@ -1259,8 +1260,13 @@ def content_header():
                 write_config_file(st.session_state.config, st.session_state.dir_home, filename="VoucherVision.yaml")
                 path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
                 # Call the machine function.
-                last_JSON_response, total_cost, st.session_state['zip_filepath'] = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'))
                 if total_cost:
                     st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
@@ -1327,10 +1333,6 @@ def content_tab_settings():
     col_local_1, col_local_2 = st.columns([2,6])
-    # st.write("---")
-    # st.header('Modules')
-    # col_m1, col_m2 = st.columns(2)
     st.write("---")
     st.header('Cropped Components')
     col_cropped_1, col_cropped_2 = st.columns([4,4])
@@ -1346,15 +1348,6 @@ def content_tab_settings():
     ### LLM Version
     with col_project_2:
-        # LLM_VERSIONS_available =
-        # st.session_state.config['leafmachine']['project']['dir_images_local'] = st.session_state['dir_uploaded_images'] #st.text_input("Input images directory", st.session_state.config['leafmachine']['project'].get('dir_images_local', ''))
-        # # st.session_state.config['leafmachine']['project']['continue_run_from_partial_xlsx'] = st.text_input("Continue run from partially completed project XLSX", st.session_state.config['leafmachine']['project'].get('continue_run_from_partial_xlsx', ''), disabled=True)
-        # st.subheader('LLM Version')
-        # st.session_state.config['leafmachine']['LLM_version'] = st.selectbox("LLM version", LLM_VERSIONS,
-        #                                                                      index=LLM_VERSIONS.index(st.session_state.config['leafmachine'].get('LLM_version', 'Azure GPT 4')),
-        #                                                                      label_visibility='collapsed')
-        # st.markdown("""***Note:*** GPT-4 is significantly more expensive than GPT-3.5  """)
         # Determine the available versions based on the API keys present
         available_versions = []
         for api_name, versions in st.session_state['LLM_VERSIONS'].items():

     emoji_bad = ":x:"
     table = {
+        'Google Vision OCR API (required!)': emoji_good if st.session_state['has_key_google_OCR'] else emoji_bad,
         'OpenAI API': emoji_good if st.session_state['has_key_openai'] else emoji_bad,
         'Palm API': emoji_good if st.session_state['has_key_palm2'] else emoji_bad,
         'OpenAI API (Azure)': emoji_good if st.session_state['has_key_azure'] else emoji_bad,
     with col_run_1:
         show_header_welcome()
         st.subheader('Run VoucherVision')
+        N_STEPS = 10
         if check_if_usable():
             if st.button("Start Processing", type='primary'):
                 write_config_file(st.session_state.config, st.session_state.dir_home, filename="VoucherVision.yaml")
                 path_custom_prompts = os.path.join(st.session_state.dir_home,'custom_prompts',st.session_state.config['leafmachine']['project']['prompt_version'])
+                # Define number of overall steps
+                progress_report.set_n_overall(N_STEPS)
+                progress_report.update_overall(f"Starting VoucherVision...")
                 # Call the machine function.
+                last_JSON_response, total_cost, st.session_state['zip_filepath'] = voucher_vision(None, st.session_state.dir_home, path_custom_prompts, None, progress_report,path_api_cost=os.path.join(st.session_state.dir_home,'api_cost','api_cost.yaml'), is_real_run=True)
                 if total_cost:
                     st.success(f":money_with_wings: This run cost :heavy_dollar_sign:{total_cost:.4f}")
     col_local_1, col_local_2 = st.columns([2,6])
     st.write("---")
     st.header('Cropped Components')
     col_cropped_1, col_cropped_2 = st.columns([4,4])
     ### LLM Version
     with col_project_2:
         # Determine the available versions based on the API keys present
         available_versions = []
         for api_name, versions in st.session_state['LLM_VERSIONS'].items():

vouchervision/component_detector/component_detector.py CHANGED Viewed

@@ -112,14 +112,18 @@ def detect_plant_components(cfg, logger, dir_home, Project, Dirs):
     return Project
-def detect_archival_components(cfg, logger, dir_home, Project, Dirs):
     if not cfg['leafmachine']['use_RGB_label_images']:
         logger.name = 'Skipping LeafMachine2 Label Detection'
-        logger.info(f"Full image will be used instead of the label collage")
     else:
         t1_start = perf_counter()
         logger.name = 'Locating Archival Components'
         logger.info(f"Detecting archival components in {len(os.listdir(Project.dir_images))} images")
         try:

     return Project
+def detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run=False, progress_report=None):
     if not cfg['leafmachine']['use_RGB_label_images']:
         logger.name = 'Skipping LeafMachine2 Label Detection'
+        logger.info(f"Full image will be used instead of the label collage")
+        if is_real_run:
+            progress_report.update_overall(f"Skipping LeafMachine2 Label Detection")
     else:
         t1_start = perf_counter()
         logger.name = 'Locating Archival Components'
         logger.info(f"Detecting archival components in {len(os.listdir(Project.dir_images))} images")
+        if is_real_run:
+            progress_report.update_overall(f"Creating LeafMachine2 Label Collage")
         try:

vouchervision/utils_VoucherVision.py CHANGED Viewed

@@ -765,7 +765,9 @@ class VoucherVision():
             self.add_data_to_excel_from_response(self.path_transcription, response, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
         return response
-    def process_specimen_batch(self, progress_report):
         try:
             if self.has_key:
                 if self.model_name:

             self.add_data_to_excel_from_response(self.path_transcription, response, filename_without_extension, path_to_crop, txt_file_path, jpg_file_path_OCR_helper, nt_in, nt_out)
         return response
+    def process_specimen_batch(self, progress_report, is_real_run=False):
+        if is_real_run:
+            progress_report.update_overall(f"Transcribing Labels")
         try:
             if self.has_key:
                 if self.model_name:

vouchervision/vouchervision_main.py CHANGED Viewed

@@ -16,7 +16,7 @@ from fetch_data import fetch_data
 from utils_VoucherVision import VoucherVision, space_saver
-def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, path_api_cost=None, test_ind = None):
     # get_n_overall = progress_report.get_n_overall()
     # progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")
@@ -43,6 +43,8 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
     #         cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]
     # Dir structure
     print_main_start("Creating Directory Structure")
     Dirs = Dir_Structure(cfg)
@@ -50,6 +52,8 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
     logger = start_logging(Dirs, cfg)
     # Check to see if required ML files are ready to use
     ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
     assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
@@ -62,15 +66,15 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
     # Detect Archival Components
     print_main_start("Locating Archival Components")
-    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs)
     # Save cropped detections
     crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
     # Process labels
     Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs)
     n_images = len(Voucher_Vision.img_paths)
-    last_JSON_response, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report)
     if path_api_cost:
         cost_summary, data, total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images)
@@ -84,6 +88,9 @@ def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progr
     logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
     space_saver(cfg, Dirs, logger)
     for handler in logger.handlers[:]:
         handler.close()
         logger.removeHandler(handler)

 from utils_VoucherVision import VoucherVision, space_saver
+def voucher_vision(cfg_file_path, dir_home, path_custom_prompts, cfg_test, progress_report, path_api_cost=None, test_ind = None, is_real_run=False):
     # get_n_overall = progress_report.get_n_overall()
     # progress_report.update_overall(f"Working on {test_ind+1} of {get_n_overall}")
     #         cfg['leafmachine']['project']['run_name'] = run_name[dir_ind]
     # Dir structure
+    if is_real_run:
+        progress_report.update_overall(f"Creating Output Directory Structure")
     print_main_start("Creating Directory Structure")
     Dirs = Dir_Structure(cfg)
     logger = start_logging(Dirs, cfg)
     # Check to see if required ML files are ready to use
+    if is_real_run:
+        progress_report.update_overall(f"Fetching LeafMachine2 Files")
     ready_to_use = fetch_data(logger, dir_home, cfg_file_path)
     assert ready_to_use, "Required ML files are not ready to use!\nThe download may have failed,\nor\nthe directory structure of LM2 has been altered"
     # Detect Archival Components
     print_main_start("Locating Archival Components")
+    Project = detect_archival_components(cfg, logger, dir_home, Project, Dirs, is_real_run, progress_report)
     # Save cropped detections
     crop_detections_from_images_VV(cfg, logger, dir_home, Project, Dirs)
     # Process labels
     Voucher_Vision = VoucherVision(cfg, logger, dir_home, path_custom_prompts, Project, Dirs)
     n_images = len(Voucher_Vision.img_paths)
+    last_JSON_response, total_tokens_in, total_tokens_out = Voucher_Vision.process_specimen_batch(progress_report, is_real_run)
     if path_api_cost:
         cost_summary, data, total_cost = save_token_info_as_csv(Dirs, cfg['leafmachine']['LLM_version'], path_api_cost, total_tokens_in, total_tokens_out, n_images)
     logger.info(f"[Total elapsed time] {round((t_overall_s - t_overall)/60)} minutes")
     space_saver(cfg, Dirs, logger)
+    if is_real_run:
+        progress_report.update_overall(f"Run Complete! :sunglasses:")
     for handler in logger.handlers[:]:
         handler.close()
         logger.removeHandler(handler)