leoxiaobin commited on
Commit
2ad8ae1
1 Parent(s): beec895

add caption + grounding tasks

Browse files
Files changed (1) hide show
  1. app.py +30 -2
app.py CHANGED
@@ -135,6 +135,33 @@ def process_image(image, task_prompt, text_input=None, model_id='microsoft/Flore
135
  task_prompt = '<MORE_DETAILED_CAPTION>'
136
  results = run_example(task_prompt, image, model_id=model_id)
137
  return results, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  elif task_prompt == 'Object Detection':
139
  task_prompt = '<OD>'
140
  results = run_example(task_prompt, image, model_id=model_id)
@@ -210,7 +237,8 @@ with gr.Blocks(css=css) as demo:
210
  input_img = gr.Image(label="Input Picture")
211
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
212
  task_prompt = gr.Dropdown(choices=[
213
- 'Caption', 'Detailed Caption', 'More Detailed Caption', 'Object Detection',
 
214
  'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
215
  'Referring Expression Segmentation', 'Region to Segmentation',
216
  'Open Vocabulary Detection', 'Region to Category', 'Region to Description',
@@ -236,4 +264,4 @@ with gr.Blocks(css=css) as demo:
236
 
237
  submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text, output_img])
238
 
239
- demo.launch(debug=True)
 
135
  task_prompt = '<MORE_DETAILED_CAPTION>'
136
  results = run_example(task_prompt, image, model_id=model_id)
137
  return results, None
138
+ elif task_prompt == 'Caption + Grounding':
139
+ task_prompt = '<CAPTION>'
140
+ results = run_example(task_prompt, image, model_id=model_id)
141
+ text_input = results[task_prompt]
142
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
143
+ results = run_example(task_prompt, image, text_input, model_id)
144
+ results['<CAPTION>'] = text_input
145
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
146
+ return results, fig_to_pil(fig)
147
+ elif task_prompt == 'Detailed Caption + Grounding':
148
+ task_prompt = '<DETAILED_CAPTION>'
149
+ results = run_example(task_prompt, image, model_id=model_id)
150
+ text_input = results[task_prompt]
151
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
152
+ results = run_example(task_prompt, image, text_input, model_id)
153
+ results['<DETAILED_CAPTION>'] = text_input
154
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
155
+ return results, fig_to_pil(fig)
156
+ elif task_prompt == 'More Detailed Caption + Grounding':
157
+ task_prompt = '<MORE_DETAILED_CAPTION>'
158
+ results = run_example(task_prompt, image, model_id=model_id)
159
+ text_input = results[task_prompt]
160
+ task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
161
+ results = run_example(task_prompt, image, text_input, model_id)
162
+ results['<MORE_DETAILED_CAPTION>'] = text_input
163
+ fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
164
+ return results, fig_to_pil(fig)
165
  elif task_prompt == 'Object Detection':
166
  task_prompt = '<OD>'
167
  results = run_example(task_prompt, image, model_id=model_id)
 
237
  input_img = gr.Image(label="Input Picture")
238
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value='microsoft/Florence-2-large')
239
  task_prompt = gr.Dropdown(choices=[
240
+ 'Caption', 'Detailed Caption', 'More Detailed Caption', 'Caption + Grounding',
241
+ 'Detailed Caption + Grounding', 'More Detailed Caption + Grounding', 'Object Detection',
242
  'Dense Region Caption', 'Region Proposal', 'Caption to Phrase Grounding',
243
  'Referring Expression Segmentation', 'Region to Segmentation',
244
  'Open Vocabulary Detection', 'Region to Category', 'Region to Description',
 
264
 
265
  submit_btn.click(process_image, [input_img, task_prompt, text_input, model_selector], [output_text, output_img])
266
 
267
+ demo.launch(debug=True)