Spaces:

barunsaha
/

slide-deck-ai

Running

App Files Files Community

barunsaha commited on 26 days ago

Commit

813ce6e

•

1 Parent(s): c4c876e

Allow users to choose from two different Mistral models

Browse files

Files changed (3) hide show

app.py +22 -11
global_config.py +10 -2
helpers/llm_helper.py +70 -69

app.py CHANGED Viewed

@@ -20,7 +20,6 @@ from langchain_core.prompts import ChatPromptTemplate
 sys.path.append('..')
 sys.path.append('../..')
-import helpers.icons_embeddings as ice
 from global_config import GlobalConfig
 from helpers import llm_helper, pptx_helper, text_helper
@@ -56,14 +55,16 @@ def _get_prompt_template(is_refinement: bool) -> str:
 @st.cache_resource
-def _get_llm():
     """
     Get an LLM instance.
     :return: The LLM.
     """
-    return llm_helper.get_hf_endpoint()
 APP_TEXT = _load_strings()
@@ -78,12 +79,19 @@ logger = logging.getLogger(__name__)
 texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
 captions = [GlobalConfig.PPTX_TEMPLATE_FILES[x]['caption'] for x in texts]
-pptx_template = st.sidebar.radio(
-    'Select a presentation template:',
-    texts,
-    captions=captions,
-    horizontal=True
-)
 def build_ui():
@@ -187,12 +195,15 @@ def set_up_chat_ui():
         response = ''
         try:
-            for chunk in _get_llm().stream(formatted_template):
                 response += chunk
                 # Update the progress bar
                 progress_percentage = min(
-                    len(response) / GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH, 0.95
                 )
                 progress_bar.progress(
                     progress_percentage,

 sys.path.append('..')
 sys.path.append('../..')
 from global_config import GlobalConfig
 from helpers import llm_helper, pptx_helper, text_helper
 @st.cache_resource
+def _get_llm(repo_id: str, max_new_tokens: int):
     """
     Get an LLM instance.
+    :param repo_id: The model name.
+    :param max_new_tokens: The max new tokens to generate.
     :return: The LLM.
     """
+    return llm_helper.get_hf_endpoint(repo_id, max_new_tokens)
 APP_TEXT = _load_strings()
 texts = list(GlobalConfig.PPTX_TEMPLATE_FILES.keys())
 captions = [GlobalConfig.PPTX_TEMPLATE_FILES[x]['caption'] for x in texts]
+with st.sidebar:
+    pptx_template = st.sidebar.radio(
+        'Select a presentation template:',
+        texts,
+        captions=captions,
+        horizontal=True
+    )
+    st.divider()
+    llm_to_use = st.sidebar.selectbox(
+        'Select an LLM to use:',
+        [f'{k} ({v["description"]})' for k, v in GlobalConfig.HF_MODELS.items()]
+    ).split(' ')[0]
 def build_ui():
         response = ''
         try:
+            for chunk in _get_llm(
+                    repo_id=llm_to_use,
+                    max_new_tokens=GlobalConfig.HF_MODELS[llm_to_use]['max_new_tokens']
+            ).stream(formatted_template):
                 response += chunk
                 # Update the progress bar
                 progress_percentage = min(
+                    len(response) / GlobalConfig.HF_MODELS[llm_to_use]['max_new_tokens'], 0.95
                 )
                 progress_bar.progress(
                     progress_percentage,

global_config.py CHANGED Viewed

@@ -17,10 +17,18 @@ class GlobalConfig:
     A data class holding the configurations.
     """
-    HF_LLM_MODEL_NAME = 'mistralai/Mistral-Nemo-Instruct-2407'
     LLM_MODEL_TEMPERATURE = 0.2
     LLM_MODEL_MIN_OUTPUT_LENGTH = 100
-    LLM_MODEL_MAX_OUTPUT_LENGTH = 4 * 4096  # tokens
     LLM_MODEL_MAX_INPUT_LENGTH = 400  # characters
     HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN', '')

     A data class holding the configurations.
     """
+    HF_MODELS = {
+        'mistralai/Mistral-Nemo-Instruct-2407': {
+            'description': 'longer response',
+            'max_new_tokens': 12228
+        },
+        'mistralai/Mistral-7B-Instruct-v0.2': {
+            'description': 'faster, shorter',
+            'max_new_tokens': 8192
+        },
+    }
     LLM_MODEL_TEMPERATURE = 0.2
     LLM_MODEL_MIN_OUTPUT_LENGTH = 100
     LLM_MODEL_MAX_INPUT_LENGTH = 400  # characters
     HUGGINGFACEHUB_API_TOKEN = os.environ.get('HUGGINGFACEHUB_API_TOKEN', '')

helpers/llm_helper.py CHANGED Viewed

@@ -9,7 +9,6 @@ from langchain_core.language_models import LLM
 from global_config import GlobalConfig
-HF_API_URL = f"https://api-inference.huggingface.co/models/{GlobalConfig.HF_LLM_MODEL_NAME}"
 HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
 REQUEST_TIMEOUT = 35
@@ -28,18 +27,20 @@ http_session.mount('https://', adapter)
 http_session.mount('http://', adapter)
-def get_hf_endpoint() -> LLM:
     """
     Get an LLM via the HuggingFaceEndpoint of LangChain.
-    :return: The LLM.
     """
-    logger.debug('Getting LLM via HF endpoint')
     return HuggingFaceEndpoint(
-        repo_id=GlobalConfig.HF_LLM_MODEL_NAME,
-        max_new_tokens=GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
         top_k=40,
         top_p=0.95,
         temperature=GlobalConfig.LLM_MODEL_TEMPERATURE,
@@ -51,69 +52,69 @@ def get_hf_endpoint() -> LLM:
     )
-def hf_api_query(payload: dict) -> dict:
-    """
-    Invoke HF inference end-point API.
-    :param payload: The prompt for the LLM and related parameters.
-    :return: The output from the LLM.
-    """
-    try:
-        response = http_session.post(
-            HF_API_URL,
-            headers=HF_API_HEADERS,
-            json=payload,
-            timeout=REQUEST_TIMEOUT
-        )
-        result = response.json()
-    except requests.exceptions.Timeout as te:
-        logger.error('*** Error: hf_api_query timeout! %s', str(te))
-        result = []
-    return result
-def generate_slides_content(topic: str) -> str:
-    """
-    Generate the outline/contents of slides for a presentation on a given topic.
-    :param topic: Topic on which slides are to be generated.
-    :return: The content in JSON format.
-    """
-    with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
-        template_txt = in_file.read().strip()
-        template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
-    output = hf_api_query({
-        'inputs': template_txt,
-        'parameters': {
-            'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
-            'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
-            'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
-            'max_new_tokens': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
-            'num_return_sequences': 1,
-            'return_full_text': False,
-            # "repetition_penalty": 0.0001
-        },
-        'options': {
-            'wait_for_model': True,
-            'use_cache': True
-        }
-    })
-    output = output[0]['generated_text'].strip()
-    # output = output[len(template_txt):]
-    json_end_idx = output.rfind('```')
-    if json_end_idx != -1:
-        # logging.debug(f'{json_end_idx=}')
-        output = output[:json_end_idx]
-    logger.debug('generate_slides_content: output: %s', output)
-    return output
 if __name__ == '__main__':

 from global_config import GlobalConfig
 HF_API_HEADERS = {"Authorization": f"Bearer {GlobalConfig.HUGGINGFACEHUB_API_TOKEN}"}
 REQUEST_TIMEOUT = 35
 http_session.mount('http://', adapter)
+def get_hf_endpoint(repo_id: str, max_new_tokens: int) -> LLM:
     """
     Get an LLM via the HuggingFaceEndpoint of LangChain.
+    :param repo_id: The model name.
+    :param max_new_tokens: The max new tokens to generate.
+    :return: The HF LLM inference endpoint.
     """
+    logger.debug('Getting LLM via HF endpoint: %s', repo_id)
     return HuggingFaceEndpoint(
+        repo_id=repo_id,
+        max_new_tokens=max_new_tokens,
         top_k=40,
         top_p=0.95,
         temperature=GlobalConfig.LLM_MODEL_TEMPERATURE,
     )
+# def hf_api_query(payload: dict) -> dict:
+#     """
+#     Invoke HF inference end-point API.
+#
+#     :param payload: The prompt for the LLM and related parameters.
+#     :return: The output from the LLM.
+#     """
+#
+#     try:
+#         response = http_session.post(
+#             HF_API_URL,
+#             headers=HF_API_HEADERS,
+#             json=payload,
+#             timeout=REQUEST_TIMEOUT
+#         )
+#         result = response.json()
+#     except requests.exceptions.Timeout as te:
+#         logger.error('*** Error: hf_api_query timeout! %s', str(te))
+#         result = []
+#
+#     return result
+# def generate_slides_content(topic: str) -> str:
+#     """
+#     Generate the outline/contents of slides for a presentation on a given topic.
+#
+#     :param topic: Topic on which slides are to be generated.
+#     :return: The content in JSON format.
+#     """
+#
+#     with open(GlobalConfig.SLIDES_TEMPLATE_FILE, 'r', encoding='utf-8') as in_file:
+#         template_txt = in_file.read().strip()
+#         template_txt = template_txt.replace('<REPLACE_PLACEHOLDER>', topic)
+#
+#     output = hf_api_query({
+#         'inputs': template_txt,
+#         'parameters': {
+#             'temperature': GlobalConfig.LLM_MODEL_TEMPERATURE,
+#             'min_length': GlobalConfig.LLM_MODEL_MIN_OUTPUT_LENGTH,
+#             'max_length': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
+#             'max_new_tokens': GlobalConfig.LLM_MODEL_MAX_OUTPUT_LENGTH,
+#             'num_return_sequences': 1,
+#             'return_full_text': False,
+#             # "repetition_penalty": 0.0001
+#         },
+#         'options': {
+#             'wait_for_model': True,
+#             'use_cache': True
+#         }
+#     })
+#
+#     output = output[0]['generated_text'].strip()
+#     # output = output[len(template_txt):]
+#
+#     json_end_idx = output.rfind('```')
+#     if json_end_idx != -1:
+#         # logging.debug(f'{json_end_idx=}')
+#         output = output[:json_end_idx]
+#
+#     logger.debug('generate_slides_content: output: %s', output)
+#
+#     return output
 if __name__ == '__main__':