hf-llm-api

Running

App Files Files Community

jonathanjordan21 commited on Jul 16

Commit

ee910d2

•

1 Parent(s): 7b40e67

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +78 -4

apis/chat_api.py CHANGED Viewed

@@ -140,6 +140,80 @@ class ChatAPIApp:
             raise HTTPException(status_code=e.status_code, detail=e.detail)
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))
     class EmbeddingRequest(BaseModel):
@@ -181,19 +255,19 @@ class ChatAPIApp:
             self.app.post(
                 prefix + "/chat/completions",
-                summary="Chat completions in conversation session",
                 include_in_schema=include_in_schema,
             )(self.chat_completions)
             self.app.post(
                 prefix + "/generate",
-                summary="Chat completions in conversation session",
                 include_in_schema=include_in_schema,
-            )(self.chat_completions)
             self.app.post(
                 prefix + "/chat",
-                summary="Chat completions in conversation session",
                 include_in_schema=include_in_schema,
             )(self.chat_completions)

             raise HTTPException(status_code=e.status_code, detail=e.detail)
         except Exception as e:
             raise HTTPException(status_code=500, detail=str(e))
+    class GenerateRequest(BaseModel):
+        model: str = Field(
+            default="nous-mixtral-8x7b",
+            description="(str) `nous-mixtral-8x7b`",
+        )
+        prompt: str = Field(
+            default="Hello, who are you?",
+            description="(list) Messages",
+        )
+        temperature: Union[float, None] = Field(
+            default=0.5,
+            description="(float) Temperature",
+        )
+        top_p: Union[float, None] = Field(
+            default=0.95,
+            description="(float) top p",
+        )
+        max_tokens: Union[int, None] = Field(
+            default=-1,
+            description="(int) Max tokens",
+        )
+        use_cache: bool = Field(
+            default=False,
+            description="(bool) Use cache",
+        )
+        stream: bool = Field(
+            default=True,
+            description="(bool) Stream",
+        )
+    def generate_text(
+        self, item: GenerateRequest, api_key: str = Depends(extract_api_key)
+    ):
+        try:
+            api_key = self.auth_api_key(api_key)
+            if item.model == "gpt-3.5-turbo":
+                streamer = OpenaiStreamer()
+                stream_response = streamer.chat_response(messages=[{"user":item.prompt}])
+            elif item.model in PRO_MODELS:
+                streamer = HuggingchatStreamer(model=item.model)
+                stream_response = streamer.chat_response(
+                    messages=[{"user":item.prompt}],
+                )
+            else:
+                streamer = HuggingfaceStreamer(model=item.model)
+                stream_response = streamer.chat_response(
+                    prompt=item.prompt,
+                    temperature=item.temperature,
+                    top_p=item.top_p,
+                    max_new_tokens=item.max_tokens,
+                    api_key=api_key,
+                    use_cache=item.use_cache,
+                )
+            if item.stream:
+                event_source_response = EventSourceResponse(
+                    streamer.chat_return_generator(stream_response),
+                    media_type="text/event-stream",
+                    ping=2000,
+                    ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
+                )
+                return event_source_response
+            else:
+                data_response = streamer.chat_return_dict(stream_response)
+                return data_response
+        except HfApiException as e:
+            raise HTTPException(status_code=e.status_code, detail=e.detail)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
     class EmbeddingRequest(BaseModel):
             self.app.post(
                 prefix + "/chat/completions",
+                summary="OpenAI Chat completions in conversation session",
                 include_in_schema=include_in_schema,
             )(self.chat_completions)
             self.app.post(
                 prefix + "/generate",
+                summary="Ollama text generation",
                 include_in_schema=include_in_schema,
+            )(self.generate_text)
             self.app.post(
                 prefix + "/chat",
+                summary="Ollama Chat completions in conversation session",
                 include_in_schema=include_in_schema,
             )(self.chat_completions)