Spaces:

yanolja
/

arena

Running

App Files Files Community

Kang Suhyun commited on Sep 24

Commit

fd9a72d

•

1 Parent(s): 8d7d881

[#130] Update non-JSON response handling (#131)

Browse files

* [#130] Update non-JSON response handling

* rename

Files changed (3) hide show

README.md +0 -1
model.py +45 -36
response.py +9 -1

README.md CHANGED Viewed

@@ -56,7 +56,6 @@ Get Involved: [Discuss and contribute on GitHub](https://github.com/yanolja/aren
    ANTHROPIC_API_KEY=<your key> \
    MISTRAL_API_KEY=<your key> \
    GEMINI_API_KEY=<your key> \
-   GROQ_API_KEY=<your key> \
    DEEPINFRA_API_KEY=<your key> \
    python3 app.py
    ```

    ANTHROPIC_API_KEY=<your key> \
    MISTRAL_API_KEY=<your key> \
    GEMINI_API_KEY=<your key> \
    DEEPINFRA_API_KEY=<your key> \
    python3 app.py
    ```

model.py CHANGED Viewed

@@ -4,7 +4,7 @@ This module contains functions to interact with the models.
 import json
 import os
-from typing import List
 import litellm
@@ -34,10 +34,12 @@ class Model:
     self.summarize_instruction = summarize_instruction or DEFAULT_SUMMARIZE_INSTRUCTION  # pylint: disable=line-too-long
     self.translate_instruction = translate_instruction or DEFAULT_TRANSLATE_INSTRUCTION  # pylint: disable=line-too-long
   def completion(self,
                  instruction: str,
                  prompt: str,
-                 max_tokens: float = None) -> str:
     messages = [{
         "role":
             "system",
@@ -50,23 +52,25 @@ Output following this JSON format without using code blocks:
         "content": prompt
     }]
-    try:
-      response = litellm.completion(model=self.provider + "/" +
-                                    self.name if self.provider else self.name,
-                                    api_key=self.api_key,
-                                    api_base=self.api_base,
-                                    messages=messages,
-                                    max_tokens=max_tokens,
-                                    **self._get_completion_kwargs())
-      json_response = response.choices[0].message.content
-      parsed_json = json.loads(json_response)
-      return parsed_json["result"]
-    except litellm.ContextWindowExceededError as e:
-      raise ContextWindowExceededError() from e
-    except json.JSONDecodeError as e:
-      raise RuntimeError(f"Failed to get JSON response: {e}") from e
   def _get_completion_kwargs(self):
     return {
@@ -82,7 +86,8 @@ class AnthropicModel(Model):
   def completion(self,
                  instruction: str,
                  prompt: str,
-                 max_tokens: float = None) -> str:
     # Ref: https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#prefill-claudes-response # pylint: disable=line-too-long
     prefix = "<result>"
     suffix = "</result>"
@@ -99,23 +104,27 @@ Text:
         "role": "assistant",
         "content": prefix
     }]
-    try:
-      response = litellm.completion(
-          model=self.provider + "/" + self.name if self.provider else self.name,
-          api_key=self.api_key,
-          api_base=self.api_base,
-          messages=messages,
-          max_tokens=max_tokens,
-      )
-    except litellm.ContextWindowExceededError as e:
-      raise ContextWindowExceededError() from e
-    result = response.choices[0].message.content
-    if not result.endswith(suffix):
-      raise RuntimeError(f"Failed to get the formatted response: {result}")
-    return result.removesuffix(suffix).strip()
 class VertexModel(Model):
@@ -164,8 +173,8 @@ supported_models: List[Model] = [
                 vertex_credentials=os.getenv("VERTEX_CREDENTIALS")),
     Model("mistral-small-2402", provider="mistral"),
     Model("mistral-large-2402", provider="mistral"),
-    Model("llama3-8b-8192", provider="groq"),
-    Model("llama3-70b-8192", provider="groq"),
     Model("google/gemma-2-9b-it", provider="deepinfra"),
     Model("google/gemma-2-27b-it", provider="deepinfra"),
     EeveModel("yanolja/EEVE-Korean-Instruct-10.8B-v1.0",

 import json
 import os
+from typing import List, Optional, Tuple
 import litellm
     self.summarize_instruction = summarize_instruction or DEFAULT_SUMMARIZE_INSTRUCTION  # pylint: disable=line-too-long
     self.translate_instruction = translate_instruction or DEFAULT_TRANSLATE_INSTRUCTION  # pylint: disable=line-too-long
+  # Returns the parsed result or raw response, and whether parsing succeeded.
   def completion(self,
                  instruction: str,
                  prompt: str,
+                 max_tokens: Optional[float] = None,
+                 max_retries: int = 2) -> Tuple[str, bool]:
     messages = [{
         "role":
             "system",
         "content": prompt
     }]
+    for attempt in range(max_retries + 1):
+      try:
+        response = litellm.completion(model=self.provider + "/" +
+                                      self.name if self.provider else self.name,
+                                      api_key=self.api_key,
+                                      api_base=self.api_base,
+                                      messages=messages,
+                                      max_tokens=max_tokens,
+                                      **self._get_completion_kwargs())
+        json_response = response.choices[0].message.content
+        parsed_json = json.loads(json_response)
+        return parsed_json["result"], True
+      except litellm.ContextWindowExceededError as e:
+        raise ContextWindowExceededError() from e
+      except json.JSONDecodeError:
+        if attempt == max_retries:
+          return json_response, False
   def _get_completion_kwargs(self):
     return {
   def completion(self,
                  instruction: str,
                  prompt: str,
+                 max_tokens: Optional[float] = None,
+                 max_retries: int = 2) -> Tuple[str, bool]:
     # Ref: https://docs.anthropic.com/en/docs/test-and-evaluate/strengthen-guardrails/increase-consistency#prefill-claudes-response # pylint: disable=line-too-long
     prefix = "<result>"
     suffix = "</result>"
         "role": "assistant",
         "content": prefix
     }]
+    for attempt in range(max_retries + 1):
+      try:
+        response = litellm.completion(
+            model=self.provider + "/" +
+            self.name if self.provider else self.name,
+            api_key=self.api_key,
+            api_base=self.api_base,
+            messages=messages,
+            max_tokens=max_tokens,
+        )
+      except litellm.ContextWindowExceededError as e:
+        raise ContextWindowExceededError() from e
+      result = response.choices[0].message.content
+      if result.endswith(suffix):
+        return result.removesuffix(suffix).strip(), True
+      if attempt == max_retries:
+        return result, False
 class VertexModel(Model):
                 vertex_credentials=os.getenv("VERTEX_CREDENTIALS")),
     Model("mistral-small-2402", provider="mistral"),
     Model("mistral-large-2402", provider="mistral"),
+    Model("meta-llama/Meta-Llama-3-8B-Instruct", provider="deepinfra"),
+    Model("meta-llama/Meta-Llama-3-70B-Instruct", provider="deepinfra"),
     Model("google/gemma-2-9b-it", provider="deepinfra"),
     Model("google/gemma-2-27b-it", provider="deepinfra"),
     EeveModel("yanolja/EEVE-Korean-Instruct-10.8B-v1.0",

response.py CHANGED Viewed

@@ -22,6 +22,7 @@ logging.basicConfig()
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 # TODO(#37): Move DB operations to db.py.
 def get_history_collection(category: str):
   if category == Category.SUMMARIZE.value:
@@ -89,14 +90,18 @@ def get_responses(prompt: str, category: str, source_lang: str,
   models: List[Model] = sample(list(supported_models), 2)
   responses = []
   for model in models:
     instruction = get_instruction(category, model, source_lang, target_lang)
     try:
       # TODO(#1): Allow user to set configuration.
-      response = model.completion(instruction, prompt)
       create_history(category, model.name, instruction, prompt, response)
       responses.append(response)
     except ContextWindowExceededError as e:
       logger.exception("Context window exceeded for model %s.", model.name)
       raise gr.Error(
@@ -106,6 +111,9 @@ def get_responses(prompt: str, category: str, source_lang: str,
       logger.exception("Failed to get response from model %s.", model.name)
       raise gr.Error("Failed to get response. Please try again.") from e
   model_names = [model.name for model in models]
   # It simulates concurrent stream response generation.

 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 # TODO(#37): Move DB operations to db.py.
 def get_history_collection(category: str):
   if category == Category.SUMMARIZE.value:
   models: List[Model] = sample(list(supported_models), 2)
   responses = []
+  got_invalid_response = False
   for model in models:
     instruction = get_instruction(category, model, source_lang, target_lang)
     try:
       # TODO(#1): Allow user to set configuration.
+      response, is_valid_response = model.completion(instruction, prompt)
       create_history(category, model.name, instruction, prompt, response)
       responses.append(response)
+      if not is_valid_response:
+        got_invalid_response = True
     except ContextWindowExceededError as e:
       logger.exception("Context window exceeded for model %s.", model.name)
       raise gr.Error(
       logger.exception("Failed to get response from model %s.", model.name)
       raise gr.Error("Failed to get response. Please try again.") from e
+  if got_invalid_response:
+    gr.Warning("An invalid response was received.")
   model_names = [model.name for model in models]
   # It simulates concurrent stream response generation.