phyloforfun commited on
Commit
806953a
1 Parent(s): cea3abb

Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing

Browse files
app.py CHANGED
@@ -10,12 +10,12 @@ from streamlit_extras.let_it_rain import rain
10
  from annotated_text import annotated_text
11
 
12
  from vouchervision.LeafMachine2_Config_Builder import write_config_file
13
- from vouchervision.VoucherVision_Config_Builder import build_VV_config, run_demo_tests_GPT, run_demo_tests_Palm , TestOptionsGPT, TestOptionsPalm, check_if_usable, run_api_tests
14
- from vouchervision.vouchervision_main import voucher_vision, voucher_vision_OCR_test
15
- from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, create_google_ocr_yaml_config, validate_dir
16
  from vouchervision.model_maps import ModelMaps
17
  from vouchervision.API_validation import APIvalidation
18
- from vouchervision.utils_hf import upload_to_drive, image_to_base64, setup_streamlit_config, save_uploaded_file, check_prompt_yaml_filename, save_uploaded_local
19
 
20
 
21
 
@@ -27,7 +27,7 @@ st.set_page_config(layout="wide", page_icon='img/icon.ico', page_title='VoucherV
27
 
28
  # Parse the 'is_hf' argument and set it in session state
29
  if 'is_hf' not in st.session_state:
30
- st.session_state['is_hf'] = False
31
 
32
 
33
  ########################################################################################################
 
10
  from annotated_text import annotated_text
11
 
12
  from vouchervision.LeafMachine2_Config_Builder import write_config_file
13
+ from vouchervision.VoucherVision_Config_Builder import build_VV_config, TestOptionsGPT, TestOptionsPalm, check_if_usable
14
+ from vouchervision.vouchervision_main import voucher_vision
15
+ from vouchervision.general_utils import test_GPU, get_cfg_from_full_path, summarize_expense_report, validate_dir
16
  from vouchervision.model_maps import ModelMaps
17
  from vouchervision.API_validation import APIvalidation
18
+ from vouchervision.utils_hf import setup_streamlit_config, save_uploaded_file, check_prompt_yaml_filename, save_uploaded_local
19
 
20
 
21
 
 
27
 
28
  # Parse the 'is_hf' argument and set it in session state
29
  if 'is_hf' not in st.session_state:
30
+ st.session_state['is_hf'] = True
31
 
32
 
33
  ########################################################################################################
vouchervision/LLM_GoogleGemini.py CHANGED
@@ -3,10 +3,10 @@ import vertexai
3
  from vertexai.preview.generative_models import GenerativeModel
4
  from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
5
  from langchain.output_parsers import RetryWithErrorOutputParser
6
- from langchain.schema import HumanMessage
7
  from langchain.prompts import PromptTemplate
8
  from langchain_core.output_parsers import JsonOutputParser
9
- from langchain_google_genai import ChatGoogleGenerativeAI
10
  from langchain_google_vertexai import VertexAI
11
 
12
  from vouchervision.utils_LLM import SystemLoadMonitor, count_tokens, save_individual_prompt
 
3
  from vertexai.preview.generative_models import GenerativeModel
4
  from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
5
  from langchain.output_parsers import RetryWithErrorOutputParser
6
+ # from langchain.schema import HumanMessage
7
  from langchain.prompts import PromptTemplate
8
  from langchain_core.output_parsers import JsonOutputParser
9
+ # from langchain_google_genai import ChatGoogleGenerativeAI
10
  from langchain_google_vertexai import VertexAI
11
 
12
  from vouchervision.utils_LLM import SystemLoadMonitor, count_tokens, save_individual_prompt
vouchervision/LLM_GooglePalm2.py CHANGED
@@ -1,14 +1,14 @@
1
  import os, time, json
2
- import vertexai
3
  from vertexai.language_models import TextGenerationModel
4
  from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
5
  from vertexai.language_models import TextGenerationModel
6
- from vertexai.preview.generative_models import GenerativeModel
7
  from langchain.output_parsers import RetryWithErrorOutputParser
8
- from langchain.schema import HumanMessage
9
  from langchain.prompts import PromptTemplate
10
  from langchain_core.output_parsers import JsonOutputParser
11
- from langchain_google_genai import ChatGoogleGenerativeAI
12
  from langchain_google_vertexai import VertexAI
13
 
14
  from vouchervision.utils_LLM import SystemLoadMonitor, count_tokens, save_individual_prompt
 
1
  import os, time, json
2
+ # import vertexai
3
  from vertexai.language_models import TextGenerationModel
4
  from vertexai.generative_models._generative_models import HarmCategory, HarmBlockThreshold
5
  from vertexai.language_models import TextGenerationModel
6
+ # from vertexai.preview.generative_models import GenerativeModel
7
  from langchain.output_parsers import RetryWithErrorOutputParser
8
+ # from langchain.schema import HumanMessage
9
  from langchain.prompts import PromptTemplate
10
  from langchain_core.output_parsers import JsonOutputParser
11
+ # from langchain_google_genai import ChatGoogleGenerativeAI
12
  from langchain_google_vertexai import VertexAI
13
 
14
  from vouchervision.utils_LLM import SystemLoadMonitor, count_tokens, save_individual_prompt
vouchervision/OCR_google_cloud_vision.py CHANGED
@@ -8,10 +8,10 @@ import colorsys
8
  from tqdm import tqdm
9
  from google.oauth2 import service_account
10
 
11
- currentdir = os.path.dirname(os.path.abspath(
12
- inspect.getfile(inspect.currentframe())))
13
- parentdir = os.path.dirname(currentdir)
14
- sys.path.append(parentdir)
15
 
16
 
17
  '''
 
8
  from tqdm import tqdm
9
  from google.oauth2 import service_account
10
 
11
+ # currentdir = os.path.dirname(os.path.abspath(
12
+ # inspect.getfile(inspect.currentframe())))
13
+ # parentdir = os.path.dirname(currentdir)
14
+ # sys.path.append(parentdir)
15
 
16
 
17
  '''
vouchervision/VoucherVision_Config_Builder.py CHANGED
@@ -1,7 +1,7 @@
1
- import os, yaml, platform, traceback
2
- from vouchervision.LeafMachine2_Config_Builder import get_default_download_folder, write_config_file
3
- from vouchervision.general_utils import validate_dir, print_main_fail
4
- from vouchervision.vouchervision_main import voucher_vision
5
  from general_utils import get_cfg_from_full_path
6
 
7
  def build_VV_config(loaded_cfg=None):
@@ -224,166 +224,166 @@ def assemble_config(dir_home, run_name, dir_images_local,dir_output,
224
 
225
  return config_data, dir_home
226
 
227
- def build_api_tests(api):
228
- dir_home = os.path.dirname(os.path.dirname(__file__))
229
- path_to_configs = os.path.join(dir_home,'demo','demo_configs')
230
 
231
- dir_home = os.path.dirname(os.path.dirname(__file__))
232
- dir_images_local = os.path.join(dir_home,'demo','demo_images')
233
- validate_dir(os.path.join(dir_home,'demo','demo_configs'))
234
- path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
235
- embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
236
- prefix_removal = ''
237
- suffix_removal = ''
238
- catalog_numerical_only = False
239
- batch_size = 500
240
- do_create_OCR_helper_image = False
241
 
242
 
243
- # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
244
- # LLM_version_user = 'Azure GPT 4'
245
 
246
- # ### Option 2: False of [False, True]
247
- # use_LeafMachine2_collage_images = False
248
 
249
- # ### Option 3: False of [False, True]
250
- # use_domain_knowledge = True
251
-
252
- test_results = {}
253
- if api == 'openai':
254
- OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
255
- elif api == 'palm':
256
- OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
257
- elif api == 'azure_openai':
258
- OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
259
- else:
260
- raise
261
-
262
- ind = -1
263
- ind_opt1 = -1
264
- ind_opt2 = -1
265
- ind_opt3 = -1
266
-
267
- for opt1 in OPT1:
268
- ind_opt1+= 1
269
- for opt2 in OPT2:
270
- ind_opt2 += 1
271
- for opt3 in OPT3:
272
- ind += 1
273
- ind_opt3 += 1
274
 
275
- LLM_version_user = opt1
276
- use_LeafMachine2_collage_images = opt2
277
- prompt_version = opt3
278
 
279
- filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
280
- run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
281
 
282
- dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
283
- validate_dir(dir_output)
284
 
285
- config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
286
- prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
287
- path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
288
- prompt_version,do_create_OCR_helper_image)
289
 
290
- write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
291
 
292
- test_results[run_name] = False
293
- ind_opt3 = -1
294
- ind_opt2 = -1
295
- ind_opt1 = -1
296
 
297
- return dir_home, path_to_configs, test_results
298
-
299
- def build_demo_tests(llm_version):
300
- dir_home = os.path.dirname(os.path.dirname(__file__))
301
- path_to_configs = os.path.join(dir_home,'demo','demo_configs')
302
-
303
- dir_home = os.path.dirname(os.path.dirname(__file__))
304
- dir_images_local = os.path.join(dir_home,'demo','demo_images')
305
- validate_dir(os.path.join(dir_home,'demo','demo_configs'))
306
- path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
307
- embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
308
- prefix_removal = ''
309
- suffix_removal = ''
310
- catalog_numerical_only = False
311
- batch_size = 500
312
- do_create_OCR_helper_image = False
313
-
314
- # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
315
- # LLM_version_user = 'Azure GPT 4'
316
 
317
- # ### Option 2: False of [False, True]
318
- # use_LeafMachine2_collage_images = False
319
 
320
- # ### Option 3: False of [False, True]
321
- # use_domain_knowledge = True
322
-
323
- test_results = {}
324
- if llm_version == 'gpt':
325
- OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
326
- elif llm_version == 'palm':
327
- OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
328
- else:
329
- raise
330
-
331
- ind = -1
332
- ind_opt1 = -1
333
- ind_opt2 = -1
334
- ind_opt3 = -1
335
-
336
- for opt1 in OPT1:
337
- ind_opt1+= 1
338
- for opt2 in OPT2:
339
- ind_opt2 += 1
340
- for opt3 in OPT3:
341
- ind += 1
342
- ind_opt3 += 1
343
 
344
- LLM_version_user = opt1
345
- use_LeafMachine2_collage_images = opt2
346
- prompt_version = opt3
347
 
348
- filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
349
- run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
350
 
351
- dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
352
- validate_dir(dir_output)
353
 
354
 
355
- if llm_version == 'gpt':
356
- if prompt_version in ['Version 1']:
357
- config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
358
- prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
359
- path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
360
- prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
361
- else:
362
- config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
363
- prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
364
- path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
365
- prompt_version, do_create_OCR_helper_image)
366
- elif llm_version == 'palm':
367
- if prompt_version in ['Version 1 PaLM 2']:
368
- config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
369
- prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
370
- path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
371
- prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
372
- else:
373
- config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
374
- prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
375
- path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
376
- prompt_version, do_create_OCR_helper_image)
377
 
378
 
379
- write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
380
 
381
- test_results[run_name] = False
382
- ind_opt3 = -1
383
- ind_opt2 = -1
384
- ind_opt1 = -1
385
 
386
- return dir_home, path_to_configs, test_results
387
 
388
  class TestOptionsGPT:
389
  OPT1 = ["gpt-4-1106-preview","GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5"]
@@ -445,138 +445,138 @@ class TestOptionsAPI_palm:
445
  def get_length(cls):
446
  return 6
447
 
448
- def run_demo_tests_GPT(progress_report):
449
- dir_home, path_to_configs, test_results = build_demo_tests('gpt')
450
- progress_report.set_n_overall(len(test_results.items()))
451
 
452
- JSON_results = {}
453
 
454
- for ind, (cfg, result) in enumerate(test_results.items()):
455
- OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
456
 
457
- test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
458
- opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
459
-
460
- if opt1_readable in ["Azure GPT 4", "Azure GPT 3.5"]:
461
- api_version = 'gpt-azure'
462
- elif opt1_readable in ["GPT 4", "GPT 3.5"]:
463
- api_version = 'gpt'
464
- else:
465
- raise
466
-
467
- opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
468
- opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
469
- # Construct the human-readable test name
470
- human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
471
- get_n_overall = progress_report.get_n_overall()
472
- progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
473
- print_main_fail(f"Starting validation test: {human_readable_name}")
474
- cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
475
 
476
- if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr'):
477
- try:
478
- last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, progress_report=progress_report, test_ind=int(test_ind))
479
- test_results[cfg] = True
480
- JSON_results[ind] = last_JSON_response
481
- except Exception as e:
482
- JSON_results[ind] = None
483
- test_results[cfg] = False
484
- print(f"An exception occurred: {e}")
485
- traceback.print_exc() # This will print the full traceback
486
- else:
487
- fail_response = ''
488
- if not check_API_key(dir_home, 'google-vision-ocr'):
489
- fail_response += "No API key found for Google Vision OCR"
490
- if not check_API_key(dir_home, api_version):
491
- fail_response += f" + No API key found for {api_version}"
492
- test_results[cfg] = False
493
- JSON_results[ind] = fail_response
494
- print(f"No API key found for {fail_response}")
495
 
496
- return test_results, JSON_results
497
-
498
- def run_demo_tests_Palm(progress_report):
499
- api_version = 'palm'
500
-
501
- dir_home, path_to_configs, test_results = build_demo_tests('palm')
502
- progress_report.set_n_overall(len(test_results.items()))
503
-
504
- JSON_results = {}
505
-
506
- for ind, (cfg, result) in enumerate(test_results.items()):
507
- OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
508
- test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
509
- opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
510
- opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
511
- opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
512
- # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
513
- # Construct the human-readable test name
514
- human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
515
- get_n_overall = progress_report.get_n_overall()
516
- progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
517
- print_main_fail(f"Starting validation test: {human_readable_name}")
518
- cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
519
 
520
- if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr') :
521
- try:
522
- last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, path_custom_prompts=None, progress_report=progress_report, test_ind=int(test_ind))
523
- test_results[cfg] = True
524
- JSON_results[ind] = last_JSON_response
525
- except Exception as e:
526
- test_results[cfg] = False
527
- JSON_results[ind] = None
528
- print(f"An exception occurred: {e}")
529
- traceback.print_exc() # This will print the full traceback
530
- else:
531
- fail_response = ''
532
- if not check_API_key(dir_home, 'google-vision-ocr'):
533
- fail_response += "No API key found for Google Vision OCR"
534
- if not check_API_key(dir_home, api_version):
535
- fail_response += f" + No API key found for {api_version}"
536
- test_results[cfg] = False
537
- JSON_results[ind] = fail_response
538
- print(f"No API key found for {fail_response}")
539
-
540
- return test_results, JSON_results
541
-
542
- def run_api_tests(api):
543
- try:
544
- dir_home, path_to_configs, test_results = build_api_tests(api)
545
-
546
- JSON_results = {}
547
-
548
- for ind, (cfg, result) in enumerate(test_results.items()):
549
- if api == 'openai':
550
- OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
551
- elif 'azure_openai':
552
- OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
553
- elif 'palm':
554
- OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
555
- test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
556
- opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
557
- opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
558
- opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
559
- # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
560
- # Construct the human-readable test name
561
- human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
562
- print_main_fail(f"Starting validation test: {human_readable_name}")
563
- cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
564
 
565
- if check_API_key(dir_home, api) and check_API_key(dir_home, 'google-vision-ocr') :
566
- try:
567
- last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, None,path_custom_prompts=None , cfg_test=None, progress_report=None, test_ind=int(test_ind))
568
- test_results[cfg] = True
569
- JSON_results[ind] = last_JSON_response
570
- return True
571
-
572
- except Exception as e:
573
- print(e)
574
- return False
575
- else:
576
- return False
577
- except Exception as e:
578
- print(e)
579
- return False
580
 
581
  def has_API_key(val):
582
  if val != '':
@@ -606,24 +606,24 @@ def check_if_usable(is_hf): ####################################################
606
  else:
607
  return False
608
 
609
- def check_API_key(dir_home, api_version):
610
- dir_home = os.path.dirname(os.path.dirname(__file__))
611
- path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
612
- cfg_private = get_cfg_from_full_path(path_cfg_private)
613
 
614
- has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
615
 
616
- has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
617
 
618
- # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
619
 
620
- has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])
621
 
622
- if api_version in ['gpt','openai'] and has_key_openai:
623
- return True
624
- elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
625
- return True
626
- elif api_version == 'google-vision-ocr' and has_key_google_OCR:
627
- return True
628
- else:
629
- return False
 
1
+ import os #, yaml, platform, traceback
2
+ from vouchervision.LeafMachine2_Config_Builder import get_default_download_folder #, write_config_file
3
+ # from vouchervision.general_utils import validate_dir, print_main_fail
4
+ # from vouchervision.vouchervision_main import voucher_vision
5
  from general_utils import get_cfg_from_full_path
6
 
7
  def build_VV_config(loaded_cfg=None):
 
224
 
225
  return config_data, dir_home
226
 
227
+ # def build_api_tests(api):
228
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
229
+ # path_to_configs = os.path.join(dir_home,'demo','demo_configs')
230
 
231
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
232
+ # dir_images_local = os.path.join(dir_home,'demo','demo_images')
233
+ # validate_dir(os.path.join(dir_home,'demo','demo_configs'))
234
+ # path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
235
+ # embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
236
+ # prefix_removal = ''
237
+ # suffix_removal = ''
238
+ # catalog_numerical_only = False
239
+ # batch_size = 500
240
+ # do_create_OCR_helper_image = False
241
 
242
 
243
+ # # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
244
+ # # LLM_version_user = 'Azure GPT 4'
245
 
246
+ # # ### Option 2: False of [False, True]
247
+ # # use_LeafMachine2_collage_images = False
248
 
249
+ # # ### Option 3: False of [False, True]
250
+ # # use_domain_knowledge = True
251
+
252
+ # test_results = {}
253
+ # if api == 'openai':
254
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
255
+ # elif api == 'palm':
256
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
257
+ # elif api == 'azure_openai':
258
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
259
+ # else:
260
+ # raise
261
+
262
+ # ind = -1
263
+ # ind_opt1 = -1
264
+ # ind_opt2 = -1
265
+ # ind_opt3 = -1
266
+
267
+ # for opt1 in OPT1:
268
+ # ind_opt1+= 1
269
+ # for opt2 in OPT2:
270
+ # ind_opt2 += 1
271
+ # for opt3 in OPT3:
272
+ # ind += 1
273
+ # ind_opt3 += 1
274
 
275
+ # LLM_version_user = opt1
276
+ # use_LeafMachine2_collage_images = opt2
277
+ # prompt_version = opt3
278
 
279
+ # filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
280
+ # run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
281
 
282
+ # dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
283
+ # validate_dir(dir_output)
284
 
285
+ # config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
286
+ # prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
287
+ # path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
288
+ # prompt_version,do_create_OCR_helper_image)
289
 
290
+ # write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
291
 
292
+ # test_results[run_name] = False
293
+ # ind_opt3 = -1
294
+ # ind_opt2 = -1
295
+ # ind_opt1 = -1
296
 
297
+ # return dir_home, path_to_configs, test_results
298
+
299
+ # def build_demo_tests(llm_version):
300
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
301
+ # path_to_configs = os.path.join(dir_home,'demo','demo_configs')
302
+
303
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
304
+ # dir_images_local = os.path.join(dir_home,'demo','demo_images')
305
+ # validate_dir(os.path.join(dir_home,'demo','demo_configs'))
306
+ # path_domain_knowledge = os.path.join(dir_home,'domain_knowledge','SLTP_UM_AllAsiaMinimalInRegion.xlsx')
307
+ # embeddings_database_name = os.path.splitext(os.path.basename(path_domain_knowledge))[0]
308
+ # prefix_removal = ''
309
+ # suffix_removal = ''
310
+ # catalog_numerical_only = False
311
+ # batch_size = 500
312
+ # do_create_OCR_helper_image = False
313
+
314
+ # # ### Option 1: "GPT 4" of ["GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5", "PaLM 2"]
315
+ # # LLM_version_user = 'Azure GPT 4'
316
 
317
+ # # ### Option 2: False of [False, True]
318
+ # # use_LeafMachine2_collage_images = False
319
 
320
+ # # ### Option 3: False of [False, True]
321
+ # # use_domain_knowledge = True
322
+
323
+ # test_results = {}
324
+ # if llm_version == 'gpt':
325
+ # OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
326
+ # elif llm_version == 'palm':
327
+ # OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
328
+ # else:
329
+ # raise
330
+
331
+ # ind = -1
332
+ # ind_opt1 = -1
333
+ # ind_opt2 = -1
334
+ # ind_opt3 = -1
335
+
336
+ # for opt1 in OPT1:
337
+ # ind_opt1+= 1
338
+ # for opt2 in OPT2:
339
+ # ind_opt2 += 1
340
+ # for opt3 in OPT3:
341
+ # ind += 1
342
+ # ind_opt3 += 1
343
 
344
+ # LLM_version_user = opt1
345
+ # use_LeafMachine2_collage_images = opt2
346
+ # prompt_version = opt3
347
 
348
+ # filename = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}.yaml"
349
+ # run_name = f"{ind}__OPT1-{ind_opt1}__OPT2-{ind_opt2}__OPT3-{ind_opt3}"
350
 
351
+ # dir_output = os.path.join(dir_home,'demo','demo_output','run_name')
352
+ # validate_dir(dir_output)
353
 
354
 
355
+ # if llm_version == 'gpt':
356
+ # if prompt_version in ['Version 1']:
357
+ # config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
358
+ # prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
359
+ # path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
360
+ # prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
361
+ # else:
362
+ # config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
363
+ # prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
364
+ # path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
365
+ # prompt_version, do_create_OCR_helper_image)
366
+ # elif llm_version == 'palm':
367
+ # if prompt_version in ['Version 1 PaLM 2']:
368
+ # config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
369
+ # prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
370
+ # path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
371
+ # prompt_version, do_create_OCR_helper_image, use_domain_knowledge=True)
372
+ # else:
373
+ # config_data, dir_home = assemble_config(dir_home, run_name, dir_images_local,dir_output,
374
+ # prefix_removal,suffix_removal,catalog_numerical_only,LLM_version_user,batch_size,
375
+ # path_domain_knowledge,embeddings_database_name,use_LeafMachine2_collage_images,
376
+ # prompt_version, do_create_OCR_helper_image)
377
 
378
 
379
+ # write_config_file(config_data, os.path.join(dir_home,'demo','demo_configs'),filename=filename)
380
 
381
+ # test_results[run_name] = False
382
+ # ind_opt3 = -1
383
+ # ind_opt2 = -1
384
+ # ind_opt1 = -1
385
 
386
+ # return dir_home, path_to_configs, test_results
387
 
388
  class TestOptionsGPT:
389
  OPT1 = ["gpt-4-1106-preview","GPT 4", "GPT 3.5", "Azure GPT 4", "Azure GPT 3.5"]
 
445
  def get_length(cls):
446
  return 6
447
 
448
+ # def run_demo_tests_GPT(progress_report):
449
+ # dir_home, path_to_configs, test_results = build_demo_tests('gpt')
450
+ # progress_report.set_n_overall(len(test_results.items()))
451
 
452
+ # JSON_results = {}
453
 
454
+ # for ind, (cfg, result) in enumerate(test_results.items()):
455
+ # OPT1, OPT2, OPT3 = TestOptionsGPT.get_options()
456
 
457
+ # test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
458
+ # opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
459
+
460
+ # if opt1_readable in ["Azure GPT 4", "Azure GPT 3.5"]:
461
+ # api_version = 'gpt-azure'
462
+ # elif opt1_readable in ["GPT 4", "GPT 3.5"]:
463
+ # api_version = 'gpt'
464
+ # else:
465
+ # raise
466
+
467
+ # opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
468
+ # opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
469
+ # # Construct the human-readable test name
470
+ # human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
471
+ # get_n_overall = progress_report.get_n_overall()
472
+ # progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
473
+ # print_main_fail(f"Starting validation test: {human_readable_name}")
474
+ # cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
475
 
476
+ # if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr'):
477
+ # try:
478
+ # last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, progress_report=progress_report, test_ind=int(test_ind))
479
+ # test_results[cfg] = True
480
+ # JSON_results[ind] = last_JSON_response
481
+ # except Exception as e:
482
+ # JSON_results[ind] = None
483
+ # test_results[cfg] = False
484
+ # print(f"An exception occurred: {e}")
485
+ # traceback.print_exc() # This will print the full traceback
486
+ # else:
487
+ # fail_response = ''
488
+ # if not check_API_key(dir_home, 'google-vision-ocr'):
489
+ # fail_response += "No API key found for Google Vision OCR"
490
+ # if not check_API_key(dir_home, api_version):
491
+ # fail_response += f" + No API key found for {api_version}"
492
+ # test_results[cfg] = False
493
+ # JSON_results[ind] = fail_response
494
+ # print(f"No API key found for {fail_response}")
495
 
496
+ # return test_results, JSON_results
497
+
498
+ # def run_demo_tests_Palm(progress_report):
499
+ # api_version = 'palm'
500
+
501
+ # dir_home, path_to_configs, test_results = build_demo_tests('palm')
502
+ # progress_report.set_n_overall(len(test_results.items()))
503
+
504
+ # JSON_results = {}
505
+
506
+ # for ind, (cfg, result) in enumerate(test_results.items()):
507
+ # OPT1, OPT2, OPT3 = TestOptionsPalm.get_options()
508
+ # test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
509
+ # opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
510
+ # opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
511
+ # opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
512
+ # # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
513
+ # # Construct the human-readable test name
514
+ # human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
515
+ # get_n_overall = progress_report.get_n_overall()
516
+ # progress_report.update_overall(f"Test {int(test_ind)+1} of {get_n_overall} --- Validating {human_readable_name}")
517
+ # print_main_fail(f"Starting validation test: {human_readable_name}")
518
+ # cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
519
 
520
+ # if check_API_key(dir_home, api_version) and check_API_key(dir_home, 'google-vision-ocr') :
521
+ # try:
522
+ # last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, cfg_test=None, path_custom_prompts=None, progress_report=progress_report, test_ind=int(test_ind))
523
+ # test_results[cfg] = True
524
+ # JSON_results[ind] = last_JSON_response
525
+ # except Exception as e:
526
+ # test_results[cfg] = False
527
+ # JSON_results[ind] = None
528
+ # print(f"An exception occurred: {e}")
529
+ # traceback.print_exc() # This will print the full traceback
530
+ # else:
531
+ # fail_response = ''
532
+ # if not check_API_key(dir_home, 'google-vision-ocr'):
533
+ # fail_response += "No API key found for Google Vision OCR"
534
+ # if not check_API_key(dir_home, api_version):
535
+ # fail_response += f" + No API key found for {api_version}"
536
+ # test_results[cfg] = False
537
+ # JSON_results[ind] = fail_response
538
+ # print(f"No API key found for {fail_response}")
539
+
540
+ # return test_results, JSON_results
541
+
542
+ # def run_api_tests(api):
543
+ # try:
544
+ # dir_home, path_to_configs, test_results = build_api_tests(api)
545
+
546
+ # JSON_results = {}
547
+
548
+ # for ind, (cfg, result) in enumerate(test_results.items()):
549
+ # if api == 'openai':
550
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_openai.get_options()
551
+ # elif 'azure_openai':
552
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_azure_openai.get_options()
553
+ # elif 'palm':
554
+ # OPT1, OPT2, OPT3 = TestOptionsAPI_palm.get_options()
555
+ # test_ind, ind_opt1, ind_opt2, ind_opt3 = cfg.split('__')
556
+ # opt1_readable = OPT1[int(ind_opt1.split('-')[1])]
557
+ # opt2_readable = "Use LeafMachine2 for Collage Images" if OPT2[int(ind_opt2.split('-')[1])] else "Don't use LeafMachine2 for Collage Images"
558
+ # opt3_readable = f"Prompt {OPT3[int(ind_opt3.split('-')[1])]}"
559
+ # # opt3_readable = "Use Domain Knowledge" if OPT3[int(ind_opt3.split('-')[1])] else "Don't use Domain Knowledge"
560
+ # # Construct the human-readable test name
561
+ # human_readable_name = f"{opt1_readable}, {opt2_readable}, {opt3_readable}"
562
+ # print_main_fail(f"Starting validation test: {human_readable_name}")
563
+ # cfg_file_path = os.path.join(path_to_configs,'.'.join([cfg,'yaml']))
564
 
565
+ # if check_API_key(dir_home, api) and check_API_key(dir_home, 'google-vision-ocr') :
566
+ # try:
567
+ # last_JSON_response, total_cost = voucher_vision(cfg_file_path, dir_home, None,path_custom_prompts=None , cfg_test=None, progress_report=None, test_ind=int(test_ind))
568
+ # test_results[cfg] = True
569
+ # JSON_results[ind] = last_JSON_response
570
+ # return True
571
+
572
+ # except Exception as e:
573
+ # print(e)
574
+ # return False
575
+ # else:
576
+ # return False
577
+ # except Exception as e:
578
+ # print(e)
579
+ # return False
580
 
581
  def has_API_key(val):
582
  if val != '':
 
606
  else:
607
  return False
608
 
609
+ # def check_API_key(dir_home, api_version):
610
+ # dir_home = os.path.dirname(os.path.dirname(__file__))
611
+ # path_cfg_private = os.path.join(dir_home, 'PRIVATE_DATA.yaml')
612
+ # cfg_private = get_cfg_from_full_path(path_cfg_private)
613
 
614
+ # has_key_openai = has_API_key(cfg_private['openai']['OPENAI_API_KEY'])
615
 
616
+ # has_key_azure_openai = has_API_key(cfg_private['openai_azure']['api_version'])
617
 
618
+ # # has_key_palm2 = has_API_key(cfg_private['google_palm']['google_palm_api'])
619
 
620
+ # has_key_google_OCR = has_API_key(cfg_private['google']['GOOGLE_APPLICATION_CREDENTIALS'])
621
 
622
+ # if api_version in ['gpt','openai'] and has_key_openai:
623
+ # return True
624
+ # elif api_version in ['gpt-azure', 'azure_openai'] and has_key_azure_openai:
625
+ # return True
626
+ # elif api_version == 'google-vision-ocr' and has_key_google_OCR:
627
+ # return True
628
+ # else:
629
+ # return False
vouchervision/directory_structure_VV.py CHANGED
@@ -1,9 +1,9 @@
1
- import os, pathlib, sys, inspect
2
- from dataclasses import dataclass, field
3
- currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
4
- parentdir = os.path.dirname(currentdir)
5
- sys.path.append(parentdir)
6
- sys.path.append(currentdir)
7
  from vouchervision.general_utils import validate_dir, get_datetime
8
 
9
  @dataclass
 
1
+ import os, pathlib #, sys, inspect
2
+ from dataclasses import dataclass
3
+ # currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
4
+ # parentdir = os.path.dirname(currentdir)
5
+ # sys.path.append(parentdir)
6
+ # sys.path.append(currentdir)
7
  from vouchervision.general_utils import validate_dir, get_datetime
8
 
9
  @dataclass
vouchervision/model_maps.py CHANGED
@@ -84,7 +84,7 @@ class ModelMaps:
84
  }
85
 
86
  @classmethod
87
- def get_version_has_key(cls, key, has_key_openai, has_key_azure_openai, has_key_palm2, has_key_mistral):
88
  # Define the mapping for 'has_key' values
89
  version_has_key = {
90
  'GPT 4 Turbo 1106-preview': has_key_openai,
@@ -99,10 +99,10 @@ class ModelMaps:
99
  'Azure GPT 4 Turbo 1106-preview': has_key_azure_openai,
100
  'Azure GPT 4 32k': has_key_azure_openai,
101
 
102
- 'PaLM 2 text-bison@001': has_key_palm2,
103
- 'PaLM 2 text-bison@002': has_key_palm2,
104
- 'PaLM 2 text-unicorn@001': has_key_palm2,
105
- 'Gemini Pro': has_key_palm2,
106
 
107
  'Mistral Tiny': has_key_mistral,
108
  'Mistral Small': has_key_mistral,
 
84
  }
85
 
86
  @classmethod
87
+ def get_version_has_key(cls, key, has_key_openai, has_key_azure_openai, has_key_google_application_credentials, has_key_mistral):
88
  # Define the mapping for 'has_key' values
89
  version_has_key = {
90
  'GPT 4 Turbo 1106-preview': has_key_openai,
 
99
  'Azure GPT 4 Turbo 1106-preview': has_key_azure_openai,
100
  'Azure GPT 4 32k': has_key_azure_openai,
101
 
102
+ 'PaLM 2 text-bison@001': has_key_google_application_credentials,
103
+ 'PaLM 2 text-bison@002': has_key_google_application_credentials,
104
+ 'PaLM 2 text-unicorn@001': has_key_google_application_credentials,
105
+ 'Gemini Pro': has_key_google_application_credentials,
106
 
107
  'Mistral Tiny': has_key_mistral,
108
  'Mistral Small': has_key_mistral,
vouchervision/tool_wikipedia.py CHANGED
@@ -1,4 +1,4 @@
1
- import itertools, yaml,wikipediaapi, requests, re, json
2
  from langchain_community.tools import WikipediaQueryRun
3
  from langchain_community.utilities import WikipediaAPIWrapper
4
  # from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
 
1
+ import itertools, wikipediaapi, requests, re, json
2
  from langchain_community.tools import WikipediaQueryRun
3
  from langchain_community.utilities import WikipediaAPIWrapper
4
  # from langchain_community.tools.wikidata.tool import WikidataAPIWrapper, WikidataQueryRun
vouchervision/utils_VoucherVision.py CHANGED
@@ -1,14 +1,14 @@
1
  import openai
2
- import os, json, glob, shutil, yaml, torch, logging, tempfile
3
  import openpyxl
4
  from openpyxl import Workbook, load_workbook
5
  import vertexai
6
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
  from langchain_openai import AzureChatOpenAI
8
  from OCR_google_cloud_vision import OCRGoogle
9
- import google.generativeai as genai
10
  from google.oauth2 import service_account
11
- from googleapiclient.discovery import build
12
 
13
  from vouchervision.LLM_OpenAI import OpenAIHandler
14
  from vouchervision.LLM_GooglePalm2 import GooglePalm2Handler
@@ -883,10 +883,10 @@ class VoucherVision():
883
  self.logger.removeHandler(handler)
884
 
885
 
886
- def process_specimen_batch_OCR_test(self, path_to_crop):
887
- for img_filename in os.listdir(path_to_crop):
888
- img_path = os.path.join(path_to_crop, img_filename)
889
- self.OCR, self.bounds, self.text_to_box_mapping = detect_text(img_path)
890
 
891
 
892
 
 
1
  import openai
2
+ import os, json, glob, shutil, yaml, torch, logging
3
  import openpyxl
4
  from openpyxl import Workbook, load_workbook
5
  import vertexai
6
  from transformers import TrOCRProcessor, VisionEncoderDecoderModel
7
  from langchain_openai import AzureChatOpenAI
8
  from OCR_google_cloud_vision import OCRGoogle
9
+ # import google.generativeai as genai
10
  from google.oauth2 import service_account
11
+ # from googleapiclient.discovery import build
12
 
13
  from vouchervision.LLM_OpenAI import OpenAIHandler
14
  from vouchervision.LLM_GooglePalm2 import GooglePalm2Handler
 
883
  self.logger.removeHandler(handler)
884
 
885
 
886
+ # def process_specimen_batch_OCR_test(self, path_to_crop):
887
+ # for img_filename in os.listdir(path_to_crop):
888
+ # img_path = os.path.join(path_to_crop, img_filename)
889
+ # self.OCR, self.bounds, self.text_to_box_mapping = detect_text(img_path)
890
 
891
 
892
 
vouchervision/utils_hf.py CHANGED
@@ -7,7 +7,7 @@ from PIL import Image
7
  from PIL import Image
8
  from io import BytesIO
9
 
10
- from vouchervision.general_utils import get_cfg_from_full_path
11
 
12
 
13
  def setup_streamlit_config(dir_home):
 
7
  from PIL import Image
8
  from io import BytesIO
9
 
10
+ # from vouchervision.general_utils import get_cfg_from_full_path
11
 
12
 
13
  def setup_streamlit_config(dir_home):
vouchervision/vouchervision_main.py CHANGED
@@ -1,14 +1,14 @@
1
  '''
2
  VoucherVision - based on LeafMachine2 Processes
3
  '''
4
- import os, inspect, sys, logging, subprocess, shutil
5
  from time import perf_counter
6
  currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
7
  parentdir = os.path.dirname(currentdir)
8
  sys.path.append(parentdir)
9
  sys.path.append(currentdir)
10
  from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
11
- from general_utils import add_to_expense_report, save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, subset_dir_images, crop_detections_from_images_VV
12
  from directory_structure_VV import Dir_Structure
13
  from data_project import Project_Info
14
  from LM2_logger import start_logging
 
1
  '''
2
  VoucherVision - based on LeafMachine2 Processes
3
  '''
4
+ import os, inspect, sys, shutil
5
  from time import perf_counter
6
  currentdir = os.path.dirname(os.path.dirname(inspect.getfile(inspect.currentframe())))
7
  parentdir = os.path.dirname(currentdir)
8
  sys.path.append(parentdir)
9
  sys.path.append(currentdir)
10
  from vouchervision.component_detector.component_detector import detect_plant_components, detect_archival_components
11
+ from general_utils import save_token_info_as_csv, print_main_start, check_for_subdirs_VV, load_config_file, load_config_file_testing, report_config, save_config_file, crop_detections_from_images_VV
12
  from directory_structure_VV import Dir_Structure
13
  from data_project import Project_Info
14
  from LM2_logger import start_logging