Spaces:
Running
Running
File size: 2,562 Bytes
6985586 a01e989 d05a223 6668c84 6985586 bca94f8 6c67d85 bca94f8 d05a223 f64e1ed d05a223 bca94f8 a01e989 d05a223 6668c84 d05a223 a01e989 d05a223 a01e989 d05a223 a01e989 6c67d85 a01e989 d05a223 6c67d85 a01e989 d05a223 a01e989 d05a223 a01e989 6668c84 a01e989 6668c84 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import streamlit as st
from text2image import get_model, get_tokenizer, get_image_transform
from utils import text_encoder, image_encoder
from PIL import Image
from jax import numpy as jnp
import pandas as pd
import requests
import jax
import gc
def app():
st.title("From Image to Text")
st.markdown(
"""
### 👋 Ciao!
Here you can find the captions or the labels that are most related to a given image. It is a zero-shot
image classification task!
🤌 Italian mode on! 🤌
For example, try to write "gatto" (cat) in the space for label1 and "dog" (cane) in the space for label2 and the run
"classify"!
"""
)
image_url = st.text_input(
"You can input the URL of an image",
value="https://www.petdetective.it/wp-content/uploads/2016/04/gatto-toilette.jpg",
)
MAX_CAP = 4
col1, col2 = st.beta_columns([3, 1])
with col2:
captions_count = st.selectbox(
"Number of labels", options=range(1, MAX_CAP + 1), index=1
)
compute = st.button("Classify")
with col1:
captions = list()
for idx in range(min(MAX_CAP, captions_count)):
captions.append(st.text_input(f"Insert label {idx+1}"))
if compute:
captions = [c for c in captions if c != ""]
if not captions or not image_url:
st.error("Please choose one image and at least one label")
else:
with st.spinner("Computing..."):
model = get_model()
tokenizer = get_tokenizer()
text_embeds = list()
for i, c in enumerate(captions):
text_embeds.extend(text_encoder(c, model, tokenizer))
text_embeds = jnp.array(text_embeds)
image_raw = requests.get(image_url, stream=True).raw
image = Image.open(image_raw).convert("RGB")
transform = get_image_transform(model.config.vision_config.image_size)
image_embed = image_encoder(transform(image), model)
# we could have a softmax here
cos_similarities = jax.nn.softmax(
jnp.matmul(image_embed, text_embeds.T)
)
chart_data = pd.Series(cos_similarities[0], index=captions)
col1, col2 = st.beta_columns(2)
with col1:
st.bar_chart(chart_data)
with col2:
st.image(image)
gc.collect()
|