Spaces:

Hndsguy
/

813-MindSearch

Sleeping

App Files Files Community

handsomeguy001 commited on 17 days ago

Commit

8723eb9

•

1 Parent(s): 83f5c2c

support litellm

Browse files

Files changed (4) hide show

mindsearch/agent/__init__.py +5 -4
mindsearch/agent/models.py +140 -2
mindsearch/app.py +13 -4
requirements.txt +216 -10

mindsearch/agent/__init__.py CHANGED Viewed

@@ -17,13 +17,14 @@ from mindsearch.agent.mindsearch_prompt import (
 LLM = {}
-def init_agent(lang='cn', model_format='internlm_server',search_engine='DuckDuckGoSearch'):
     llm = LLM.get(model_format, None)
     if llm is None:
         llm_cfg = getattr(llm_factory, model_format)
         if llm_cfg is None:
             raise NotImplementedError
         llm_cfg = llm_cfg.copy()
         llm = llm_cfg.pop('type')(**llm_cfg)
         LLM[model_format] = llm
@@ -37,9 +38,9 @@ def init_agent(lang='cn', model_format='internlm_server',search_engine='DuckDuck
         llm=llm,
         protocol=MindSearchProtocol(meta_prompt=datetime.now().strftime(
             'The current date is %Y-%m-%d.'),
-                                    interpreter_prompt=interpreter_prompt,
-                                    response_prompt=FINAL_RESPONSE_CN
-                                    if lang == 'cn' else FINAL_RESPONSE_EN),
         searcher_cfg=dict(
             llm=llm,
             plugin_executor=ActionExecutor(

 LLM = {}
+def init_agent(lang='cn', model_format='internlm_server', search_engine='DuckDuckGoSearch', **kwargs):
     llm = LLM.get(model_format, None)
     if llm is None:
         llm_cfg = getattr(llm_factory, model_format)
         if llm_cfg is None:
             raise NotImplementedError
         llm_cfg = llm_cfg.copy()
+        llm_cfg.update(kwargs)
         llm = llm_cfg.pop('type')(**llm_cfg)
         LLM[model_format] = llm
         llm=llm,
         protocol=MindSearchProtocol(meta_prompt=datetime.now().strftime(
             'The current date is %Y-%m-%d.'),
+            interpreter_prompt=interpreter_prompt,
+            response_prompt=FINAL_RESPONSE_CN
+            if lang == 'cn' else FINAL_RESPONSE_EN),
         searcher_cfg=dict(
             llm=llm,
             plugin_executor=ActionExecutor(

mindsearch/agent/models.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import os
 from lagent.llms import (GPTAPI, INTERNLM2_META, HFTransformerCasualLM,
@@ -38,7 +44,8 @@ internlm_hf = dict(type=HFTransformerCasualLM,
 gpt4 = dict(type=GPTAPI,
             model_type='gpt-4-turbo',
             key=os.environ.get('OPENAI_API_KEY', 'YOUR OPENAI API KEY'),
-            openai_api_base=os.environ.get('OPENAI_API_BASE', 'https://api.openai.com/v1/chat/completions'),
             )
 url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation'
@@ -61,7 +68,8 @@ qwen = dict(type=GPTAPI,
 internlm_silicon = dict(type=GPTAPI,
                         model_type='internlm/internlm2_5-7b-chat',
-                        key=os.environ.get('SILICON_API_KEY', 'YOUR SILICON API KEY'),
                         openai_api_base='https://api.siliconflow.cn/v1/chat/completions',
                         meta_template=[
                             dict(role='system', api_role='system'),
@@ -75,3 +83,133 @@ internlm_silicon = dict(type=GPTAPI,
                         max_new_tokens=8192,
                         repetition_penalty=1.02,
                         stop_words=['<|im_end|>'])

+from lagent.llms import BaseAPIModel
+from typing import List, Optional, Union
+from litellm import completion
+from lagent.schema import ModelStatusCode
+from lagent.utils.util import filter_suffix
 import os
 from lagent.llms import (GPTAPI, INTERNLM2_META, HFTransformerCasualLM,
 gpt4 = dict(type=GPTAPI,
             model_type='gpt-4-turbo',
             key=os.environ.get('OPENAI_API_KEY', 'YOUR OPENAI API KEY'),
+            openai_api_base=os.environ.get(
+                'OPENAI_API_BASE', 'https://api.openai.com/v1/chat/completions'),
             )
 url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation'
 internlm_silicon = dict(type=GPTAPI,
                         model_type='internlm/internlm2_5-7b-chat',
+                        key=os.environ.get(
+                            'SILICON_API_KEY', 'YOUR SILICON API KEY'),
                         openai_api_base='https://api.siliconflow.cn/v1/chat/completions',
                         meta_template=[
                             dict(role='system', api_role='system'),
                         max_new_tokens=8192,
                         repetition_penalty=1.02,
                         stop_words=['<|im_end|>'])
+class litellmCompletion(BaseAPIModel):
+    """
+    Args:
+        path (str): The path to the model.
+            It could be one of the following options:
+                    - i) A local directory path of a turbomind model which is
+                        converted by `lmdeploy convert` command or download
+                        from ii) and iii).
+                    - ii) The model_id of a lmdeploy-quantized model hosted
+                        inside a model repo on huggingface.co, such as
+                        "InternLM/internlm-chat-20b-4bit",
+                        "lmdeploy/llama2-chat-70b-4bit", etc.
+                    - iii) The model_id of a model hosted inside a model repo
+                        on huggingface.co, such as "internlm/internlm-chat-7b",
+                        "Qwen/Qwen-7B-Chat ", "baichuan-inc/Baichuan2-7B-Chat"
+                        and so on.
+        model_name (str): needed when model_path is a pytorch model on
+            huggingface.co, such as "internlm-chat-7b",
+            "Qwen-7B-Chat ", "Baichuan2-7B-Chat" and so on.
+        tp (int): tensor parallel
+        pipeline_cfg (dict): config of pipeline
+    """
+    def __init__(self,
+                 path='',
+                 model_name="command-r",
+                 **kwargs):
+        self.model_name = model_name
+        super().__init__(path, **kwargs)
+    def generate(self,
+                 inputs: Union[str, List[str]],
+                 do_preprocess: bool = None,
+                 skip_special_tokens: bool = False,
+                 **kwargs):
+        """Return the chat completions in non-stream mode.
+        Args:
+            inputs (Union[str, List[str]]): input texts to be completed.
+            do_preprocess (bool): whether pre-process the messages. Default to
+                True, which means chat_template will be applied.
+            skip_special_tokens (bool): Whether or not to remove special tokens
+                in the decoding. Default to be False.
+        Returns:
+            (a list of/batched) text/chat completion
+        """
+        batched = True
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        prompts = inputs
+        messages = [{"role": "user", "content": prompt}for prompt in prompts]
+        gen_params = self.update_gen_params(**kwargs)
+        response = completion(model=self.model_name, messages=messages)
+        response = [resp.message.content for resp in response.choices]
+        # remove stop_words
+        response = filter_suffix(response, self.gen_params.get('stop_words'))
+        if batched:
+            return response
+        return response[0]
+    def stream_chat(self,
+                    inputs: List[dict],
+                    stream: bool = True,
+                    ignore_eos: bool = False,
+                    skip_special_tokens: Optional[bool] = False,
+                    timeout: int = 30,
+                    **kwargs):
+        """Start a new round conversation of a session. Return the chat
+        completions in stream mode.
+        Args:
+            session_id (int): the identical id of a session
+            inputs (List[dict]): user's inputs in this round conversation
+            sequence_start (bool): start flag of a session
+            sequence_end (bool): end flag of a session
+            stream (bool): return in a streaming format if enabled
+            ignore_eos (bool): indicator for ignoring eos
+            skip_special_tokens (bool): Whether or not to remove special tokens
+                in the decoding. Default to be False.
+            timeout (int): max time to wait for response
+        Returns:
+            tuple(Status, str, int): status, text/chat completion,
+            generated token number
+        """
+        gen_params = self.update_gen_params(**kwargs)
+        max_new_tokens = gen_params.pop('max_new_tokens')
+        gen_params.update(max_tokens=max_new_tokens)
+        resp = ''
+        finished = False
+        stop_words = gen_params.get('stop_words')
+        if stop_words is None:
+            stop_words = []
+        messages = self.template_parser._prompt2api(inputs)
+        for text in completion(
+                self.model_name,
+                messages,
+                stream=stream,
+                **gen_params):
+            if not text.choices[0].delta.content:
+                continue
+            resp += text.choices[0].delta.content
+            if not resp:
+                continue
+            # remove stop_words
+            for sw in stop_words:
+                if sw in resp:
+                    resp = filter_suffix(resp, stop_words)
+                    finished = True
+                    break
+            yield ModelStatusCode.STREAM_ING, resp, None
+            if finished:
+                break
+        yield ModelStatusCode.END, resp, None
+litellm_completion = dict(type=litellmCompletion,
+                          # model_name="deepseek/deepseek-chat",
+                          meta_template=[
+                              dict(role='system', api_role='system'),
+                              dict(role='user', api_role='user'),
+                              dict(role='assistant', api_role='assistant'),
+                              dict(role='environment', api_role='system')
+                          ]
+                          )

mindsearch/app.py CHANGED Viewed

@@ -24,9 +24,11 @@ def parse_arguments():
                         type=str,
                         help='Model format')
     parser.add_argument('--search_engine',
-                       default='DuckDuckGoSearch',
-                       type=str,
-                       help='Search engine')
     return parser.parse_args()
@@ -127,7 +129,14 @@ async def run(request: GenerationParams):
             await queue.wait_closed()
     inputs = request.inputs
-    agent = init_agent(lang=args.lang, model_format=args.model_format,search_engine=args.search_engine)
     return EventSourceResponse(generate())

                         type=str,
                         help='Model format')
     parser.add_argument('--search_engine',
+                        default='DuckDuckGoSearch',
+                        type=str,
+                        help='Search engine')
+    parser.add_argument('--model_name', default='deepseek/deepseek-chat',
+                        type=str, help='litellm model name')
     return parser.parse_args()
             await queue.wait_closed()
     inputs = request.inputs
+    if args.model_format == 'litellm_completion':
+        agent = init_agent(lang=args.lang, model_format=args.model_format,
+                           search_engine=args.search_engine,
+                           model_name=args.model_name)
+    else:
+        agent = init_agent(
+            lang=args.lang, model_format=args.model_format,
+            search_engine=args.search_engine)
     return EventSourceResponse(generate())

requirements.txt CHANGED Viewed

@@ -1,12 +1,218 @@
 duckduckgo_search==5.3.1b1
-einops
-fastapi
-git+https://github.com/InternLM/lagent.git
-gradio
-janus
-lmdeploy
-pyvis
-sse-starlette
-termcolor
 transformers==4.41.0
-uvicorn

+accelerate==0.33.0
+addict==2.4.0
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+altair==5.4.1
+annotated-types==0.7.0
+anyio==4.4.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+arxiv==2.1.3
+asttokens==2.4.1
+async-lru==2.0.4
+attrs==24.2.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+blinker==1.8.2
+Brotli==1.1.0
+cachetools==5.5.0
+certifi==2024.8.30
+cffi==1.17.0
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.5
+decorator==5.1.1
+defusedxml==0.7.1
+distro==1.9.0
 duckduckgo_search==5.3.1b1
+einops==0.8.0
+executing==2.0.1
+fastapi==0.112.2
+fastjsonschema==2.20.0
+feedparser==6.0.11
+ffmpy==0.4.0
+filelock==3.15.4
+fire==0.6.0
+fonttools==4.53.1
+fqdn==1.5.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+func_timeout==4.3.5
+gitdb==4.0.11
+GitPython==3.1.43
+gradio==4.42.0
+gradio_client==1.3.0
+griffe==1.2.0
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==1.0.5
+httpx==0.27.2
+huggingface-hub==0.24.6
+hyperframe==6.0.1
+idna==3.8
+importlib_metadata==8.4.0
+importlib_resources==6.4.4
+ipykernel==6.29.5
+ipython==8.27.0
+ipywidgets==8.1.5
+isoduration==20.11.0
+janus==1.0.0
+jedi==0.19.1
+Jinja2==3.1.4
+jiter==0.5.0
+json5==0.9.25
+jsonpickle==3.2.2
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+jupyter==1.1.1
+jupyter-console==6.6.3
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.5
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+jupyterlab_widgets==3.0.13
+kiwisolver==1.4.5
+-e git+https://github.com/InternLM/lagent.git@906845f1af47fcb7d81c5c32ec44b0cc22204f8a#egg=lagent
+litellm==1.44.13
+lmdeploy==0.5.3
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+mmengine-lite==0.10.4
+mpmath==1.3.0
+multidict==6.0.5
+narwhals==1.6.0
+nbclient==0.10.0
+nbconvert==7.16.4
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.3
+notebook==7.2.2
+notebook_shim==0.2.4
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.6.68
+nvidia-nvtx-cu12==12.1.105
+openai==1.43.0
+orjson==3.10.7
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.2
+pandocfilters==1.5.1
+parso==0.8.4
+peft==0.11.1
+pexpect==4.9.0
+phx-class-registry==4.1.0
+pillow==10.4.0
+platformdirs==4.2.2
+prometheus_client==0.20.0
+prompt_toolkit==3.0.47
+protobuf==5.28.0
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyarrow==17.0.0
+pycparser==2.22
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydeck==0.9.1
+pydub==0.25.1
+Pygments==2.18.0
+pynvml==11.5.3
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-json-logger==2.0.7
+python-multipart==0.0.9
+pytz==2024.1
+pyvis==0.3.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+regex==2024.7.24
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.8.0
+rpds-py==0.20.0
+ruff==0.6.3
+safetensors==0.4.4
+semantic-version==2.10.0
+Send2Trash==1.8.3
+sentencepiece==0.2.0
+setuptools==72.2.0
+sgmllib3k==1.0.0
+shellingham==1.5.4
+shortuuid==1.0.13
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+socksio==1.0.0
+soupsieve==2.6
+sse-starlette==2.1.3
+stack-data==0.6.3
+starlette==0.38.2
+streamlit==1.38.0
+sympy==1.13.2
+tenacity==8.5.0
+termcolor==2.4.0
+terminado==0.18.1
+tiktoken==0.7.0
+timeout-decorator==0.5.0
+tinycss2==1.3.0
+tokenizers==0.19.1
+toml==0.10.2
+tomli==2.0.1
+tomlkit==0.12.0
+torch==2.3.1
+torchvision==0.18.1
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
 transformers==4.41.0
+triton==2.3.1
+typer==0.12.5
+types-python-dateutil==2.9.0.20240821
+typing_extensions==4.12.2
+tzdata==2024.1
+uri-template==1.3.0
+urllib3==2.2.2
+uvicorn==0.30.6
+watchdog==4.0.2
+wcwidth==0.2.13
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+websockets==12.0
+wheel==0.44.0
+widgetsnbextension==4.0.13
+yapf==0.40.2
+yarl==1.9.6
+zipp==3.20.1