Spaces:

vectara
/

leaderboard

Running on CPU Upgrade

App Files Files Community

Miaoran000 commited on Apr 19

Commit

2aa9a75

•

1 Parent(s): 6632750

minor fix

Browse files

Files changed (2) hide show

src/backend/model_operations.py +67 -14
src/backend/run_eval_suite.py +2 -1

src/backend/model_operations.py CHANGED Viewed

@@ -19,7 +19,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 import torch
 import cohere
 from openai import OpenAI
 import src.backend.util as util
 import src.envs as envs
@@ -131,6 +131,10 @@ class SummaryGenerator:
                             wait_time = 200
                             print(f"Model is loading, wait for {wait_time}")
                             time.sleep(wait_time)
                         else:
                             print(f"Error at index {index}: {e}")
                             _summary = ""
@@ -161,8 +165,16 @@ class SummaryGenerator:
     def generate_summary(self, system_prompt: str, user_prompt: str):
         # Using Together AI API
-        if 'mixtral' in self.model_id.lower() or 'dbrx' in self.model_id.lower() or 'wizardlm' in self.model_id.lower(): # For mixtral and dbrx models, use Together AI API
-            suffix = "completions" if ('mixtral' in self.model_id.lower() or 'base' in self.model_id.lower()) else "chat/completions"
             url = f"https://api.together.xyz/v1/{suffix}"
             payload = {
@@ -170,15 +182,17 @@ class SummaryGenerator:
                 # "max_tokens": 4096,
                 'max_new_tokens': 250,
                 "temperature": 0.0,
-                'repetition_penalty': 1.1 if 'mixtral' in self.model_id.lower() else 1
             }
-            if 'mixtral' in self.model_id.lower():
-                # payload['prompt'] = user_prompt
-                # payload['prompt'] = "Write a summary of the following passage:\nPassage:\n" + user_prompt.split('Passage:\n')[-1] + '\n\nSummary:'
-                payload['prompt'] = 'You must stick to the passage provided. Provide a concise summary of the following passage, covering the core pieces of information described:\nPassage:\n' + user_prompt.split('Passage:\n')[-1] + '\n\nSummary:'
-                print(payload)
-            else:
-                payload['messages'] = [{"role": "system", "content": system_prompt},
                                         {"role": "user", "content": user_prompt}]
             headers = {
                 "accept": "application/json",
@@ -216,8 +230,47 @@ class SummaryGenerator:
             print(result)
             return result
         # Using HF API or download checkpoints
-        if self.local_model is None:
             try: # try use HuggingFace API
                 response = litellm.completion(
@@ -229,6 +282,7 @@ class SummaryGenerator:
                     api_base=self.api_base,
                 )
                 result = response['choices'][0]['message']['content']
             except: # fail to call api. run it locally.
                 self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
                 print("Tokenizer loaded")
@@ -249,8 +303,7 @@ class SummaryGenerator:
             result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             result = result.replace(prompt[0], '')
             print(result)
-        return result
     def _compute_avg_length(self):
         """

 import torch
 import cohere
 from openai import OpenAI
+import google.generativeai as genai
 import src.backend.util as util
 import src.envs as envs
                             wait_time = 200
                             print(f"Model is loading, wait for {wait_time}")
                             time.sleep(wait_time)
+                        elif '429 Resource has been exhausted' in str(e): # for gemini models
+                            wait_time = 60
+                            print(f"Quota has reached, wait for {wait_time}")
+                            time.sleep(wait_time)
                         else:
                             print(f"Error at index {index}: {e}")
                             _summary = ""
     def generate_summary(self, system_prompt: str, user_prompt: str):
         # Using Together AI API
+        using_together_api = False
+        together_ai_api_models = ['mixtral', 'dbrx', 'wizardlm', 'llama-3']
+        for together_ai_api_model in together_ai_api_models:
+            if together_ai_api_model in self.model_id.lower():
+                using_together_api = True
+                break
+        # if 'mixtral' in self.model_id.lower() or 'dbrx' in self.model_id.lower() or 'wizardlm' in self.model_id.lower(): # For mixtral and dbrx models, use Together AI API
+        if using_together_api:
+            # suffix = "completions" if ('mixtral' in self.model_id.lower() or 'base' in self.model_id.lower()) else "chat/completions"
+            suffix = "chat/completions"
             url = f"https://api.together.xyz/v1/{suffix}"
             payload = {
                 # "max_tokens": 4096,
                 'max_new_tokens': 250,
                 "temperature": 0.0,
+                # 'repetition_penalty': 1.1 if 'mixtral' in self.model_id.lower() else 1
             }
+            # if 'mixtral' in self.model_id.lower():
+            #     # payload['prompt'] = user_prompt
+            #     # payload['prompt'] = "Write a summary of the following passage:\nPassage:\n" + user_prompt.split('Passage:\n')[-1] + '\n\nSummary:'
+            #     payload['prompt'] = 'You must stick to the passage provided. Provide a concise summary of the following passage, covering the core pieces of information described:\nPassage:\n' + user_prompt.split('Passage:\n')[-1] + '\n\nSummary:'
+            #     print(payload)
+            # else:
+            #     payload['messages'] = [{"role": "system", "content": system_prompt},
+            #                             {"role": "user", "content": user_prompt}]
+            payload['messages'] = [{"role": "system", "content": system_prompt},
                                         {"role": "user", "content": user_prompt}]
             headers = {
                 "accept": "application/json",
             print(result)
             return result
+        # Using Google AI API for Gemini models
+        elif 'gemini' in self.model_id.lower():
+            genai.configure(api_key=os.getenv('GOOGLE_AI_API_KEY'))
+            generation_config = {
+                "temperature": 0,
+                "top_p": 0.95, # cannot change
+                "top_k": 0,
+                "max_output_tokens": 250,
+                # "response_mime_type": "application/json",
+            }
+            safety_settings = [
+                {
+                    "category": "HARM_CATEGORY_HARASSMENT",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_HATE_SPEECH",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
+                    "threshold": "BLOCK_NONE"
+                },
+                {
+                    "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
+                    "threshold": "BLOCK_NONE"
+                },
+            ]
+            model = genai.GenerativeModel(model_name="gemini-1.5-pro-latest" if "gemini-1.5-pro" in self.model_id.lower() else self.model_id.lower().split('google/')[-1],
+                              generation_config=generation_config,
+                              system_instruction=system_prompt,
+                              safety_settings=safety_settings)
+            convo = model.start_chat(history=[])
+            convo.send_message(user_prompt)
+            # print(convo.last)
+            result = convo.last.text
+            print(result)
+            return result
         # Using HF API or download checkpoints
+        elif self.local_model is None:
             try: # try use HuggingFace API
                 response = litellm.completion(
                     api_base=self.api_base,
                 )
                 result = response['choices'][0]['message']['content']
+                return result
             except: # fail to call api. run it locally.
                 self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
                 print("Tokenizer loaded")
             result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             result = result.replace(prompt[0], '')
             print(result)
+            return result
     def _compute_avg_length(self):
         """

src/backend/run_eval_suite.py CHANGED Viewed

@@ -48,7 +48,8 @@ def run_evaluation(eval_request: EvalRequest, batch_size, device,
                             batch_size, device, no_cache, limit, write_out=True,
                             output_base_path='logs')
         results = evaluator.evaluate()
-        evaluator.write_results()
     except Exception as e:
         logging.error(f"Error during evaluation: {e}")
         raise

                             batch_size, device, no_cache, limit, write_out=True,
                             output_base_path='logs')
         results = evaluator.evaluate()
+        if write_results:
+            evaluator.write_results()
     except Exception as e:
         logging.error(f"Error during evaluation: {e}")
         raise