hf-llm-api

Running

Hansimov commited on Jan 28

Commit

403b8cf

•

1 Parent(s): e2b245b

:gem: [Feature] Add top_p and set 0.95 as default

Files changed (2) hide show

apis/chat_api.py CHANGED Viewed

@@ -91,6 +91,10 @@ class ChatAPIApp:
             default=0.5,
             description="(float) Temperature",
         )
         max_tokens: Union[int, None] = Field(
             default=-1,
             description="(int) Max tokens",
@@ -115,6 +119,7 @@ class ChatAPIApp:
         stream_response = streamer.chat_response(
             prompt=composer.merged_str,
             temperature=item.temperature,
             max_new_tokens=item.max_tokens,
             api_key=api_key,
             use_cache=item.use_cache,

             default=0.5,
             description="(float) Temperature",
         )
+        top_p: Union[float, None] = Field(
+            default=0.95,
+            description="(float) top p",
+        )
         max_tokens: Union[int, None] = Field(
             default=-1,
             description="(int) Max tokens",
         stream_response = streamer.chat_response(
             prompt=composer.merged_str,
             temperature=item.temperature,
+            top_p=item.top_p,
             max_new_tokens=item.max_tokens,
             api_key=api_key,
             use_cache=item.use_cache,

networks/message_streamer.py CHANGED Viewed

@@ -62,6 +62,7 @@ class MessageStreamer:
         self,
         prompt: str = None,
         temperature: float = 0.5,
         max_new_tokens: int = None,
         api_key: str = None,
         use_cache: bool = False,
@@ -111,6 +112,7 @@ class MessageStreamer:
             "inputs": prompt,
             "parameters": {
                 "temperature": temperature,
                 "max_new_tokens": max_new_tokens,
                 "return_full_text": False,
             },

         self,
         prompt: str = None,
         temperature: float = 0.5,
+        top_p: float = 0.95,
         max_new_tokens: int = None,
         api_key: str = None,
         use_cache: bool = False,
             "inputs": prompt,
             "parameters": {
                 "temperature": temperature,
+                "top_p": top_p,
                 "max_new_tokens": max_new_tokens,
                 "return_full_text": False,
             },