VoucherVision / vouchervision /llava_test.py
phyloforfun's picture
Major update. Support for 15 LLMs, World Flora Online taxonomy validation, geolocation, 2 OCR methods, significant UI changes, stability improvements, consistent JSON parsing
524a99c
from LLaVA.llava.model.builder import load_pretrained_model
from LLaVA.llava.mm_utils import get_model_name_from_path
from LLaVA.llava.eval.run_llava import eval_model
# model_path = "liuhaotian/llava-v1.5-7b"
# tokenizer, model, image_processor, context_len = load_pretrained_model(
# model_path=model_path,
# model_base=None,
# model_name=get_model_name_from_path(model_path)
# )
# model_path = "liuhaotian/llava-v1.5-7b"
# model_path = "liuhaotian/llava-v1.6-mistral-7b"
model_path = "liuhaotian/llava-v1.6-34b"
prompt = """I need you to transcribe all of the text in this image. Place the transcribed text into a JSON dictionary with this form {"Transcription": "text"}"""
# image_file = "https://llava-vl.github.io/static/images/view.jpg"
image_file = "/home/brlab/Dropbox/VoucherVision/demo/demo_images/MICH_16205594_Poaceae_Jouvea_pilosa.jpg"
args = type('Args', (), {
"model_path": model_path,
"model_base": None,
"model_name": get_model_name_from_path(model_path),
"query": prompt,
"conv_mode": None,
"image_file": image_file,
"sep": ",",
"temperature": 0,
"top_p": None,
"num_beams": 1,
"max_new_tokens": 512,
# "load_8_bit": True,
})()
eval_model(args)