OpenGVLab/InternVL2-8B · RuntimeError: This event loop is already running

from lmdeploy import pipeline, TurbomindEngineConfig
from lmdeploy.vl import load_image

model = 'OpenGVLab/InternVL2-8B'
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')  
pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
response = pipe(('describe this image', image))
print(response.text)

{
    "name": "RuntimeError",
    "message": "This event loop is already running",
    "stack": "---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[1], line 7
      5 image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
      6 pipe = pipeline(model, backend_config=TurbomindEngineConfig(session_len=8192))
----> 7 response = pipe(('describe this image', image))
      8 print(response.text)

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/site-packages/lmdeploy/serve/vl_async_engine.py:123, in VLAsyncEngine.__call__(self, prompts, **kwargs)
    121 \"\"\"Inference a batch of prompts.\"\"\"
    122 prompts = self._convert_prompts(prompts)
--> 123 return super().__call__(prompts, **kwargs)

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/site-packages/lmdeploy/serve/async_engine.py:305, in AsyncEngine.__call__(self, prompts, gen_config, request_output_len, top_k, top_p, temperature, repetition_penalty, ignore_eos, do_preprocess, adapter_name, use_tqdm, **kwargs)
    297 if gen_config is None:
    298     gen_config = GenerationConfig(
    299         max_new_tokens=request_output_len,
    300         top_k=top_k,
   (...)
    303         repetition_penalty=repetition_penalty,
    304         ignore_eos=ignore_eos)
--> 305 return self.batch_infer(prompts,
    306                         gen_config=gen_config,
    307                         do_preprocess=do_preprocess,
    308                         adapter_name=adapter_name,
    309                         use_tqdm=use_tqdm,
    310                         **kwargs)

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/site-packages/lmdeploy/serve/vl_async_engine.py:109, in VLAsyncEngine.batch_infer(self, prompts, **kwargs)
    107 \"\"\"Inference a batch of prompts.\"\"\"
    108 prompts = self._convert_prompts(prompts)
--> 109 return super().batch_infer(prompts, **kwargs)

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/site-packages/lmdeploy/serve/async_engine.py:429, in AsyncEngine.batch_infer(self, prompts, gen_config, do_preprocess, adapter_name, use_tqdm, **kwargs)
    425 async def gather():
    426     await asyncio.gather(
    427         *[_inner_call(i, generators[i]) for i in range(len(prompts))])
--> 429 _get_event_loop().run_until_complete(gather())
    430 outputs = outputs[0] if need_list_wrap else outputs
    431 return outputs

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/asyncio/base_events.py:592, in BaseEventLoop.run_until_complete(self, future)
    581 \"\"\"Run until the Future is done.
    582 
    583 If the argument is a coroutine, it is wrapped in a Task.
   (...)
    589 Return the Future's result, or raise its exception.
    590 \"\"\"
    591 self._check_closed()
--> 592 self._check_running()
    594 new_task = not futures.isfuture(future)
    595 future = tasks.ensure_future(future, loop=self)

File /scratch/xiangrui/conda_envs/sol_envs/diffusers/lib/python3.8/asyncio/base_events.py:552, in BaseEventLoop._check_running(self)
    550 def _check_running(self):
    551     if self.is_running():
--> 552         raise RuntimeError('This event loop is already running')
    553     if events._get_running_loop() is not None:
    554         raise RuntimeError(
    555             'Cannot run the event loop while another loop is running')

RuntimeError: This event loop is already running"
}```