import requests import streamlit as st import os from huggingface_hub import InferenceClient API_URL = 'https://qe55p8afio98s0u3.us-east-1.aws.endpoints.huggingface.cloud' API_KEY = os.getenv('API_KEY') headers = { "Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json" } endpoint_url = API_URL hf_token = API_KEY client = InferenceClient(endpoint_url, token=hf_token) gen_kwargs = dict( max_new_tokens=512, top_k=30, top_p=0.9, temperature=0.2, repetition_penalty=1.02, stop_sequences=["\nUser:", "<|endoftext|>", ""], ) prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface." stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs) report=[] res_box = st.empty() collected_chunks=[] collected_messages=[] for r in stream: if r.token.special: continue if r.token.text in gen_kwargs["stop_sequences"]: break collected_chunks.append(r.token.text) chunk_message = r.token.text collected_messages.append(chunk_message) try: report.append(content) if len(r.token.text) > 0: result="".join(report).strip() res_box.markdown(f'*{result}*') except: st.write(' ') #full_reply = ''.join() #st.markdown(r.token.text, end = "") #st.write(r.token.text) def query(payload): response = requests.post(API_URL, headers=headers, json=payload) st.markdown(response.json()) return response.json() def get_output(prompt): return query({"inputs": prompt}) def main(): st.title("Medical Llama Test Bench with Inference Endpoints Llama 7B") example_input = st.text_input("Enter your example text:") if st.button("Summarize with Variation 1"): prompt = f"Write instructions to teach anyone to write a discharge plan. List the entities, features and relationships to CCDA and FHIR objects in boldface. {example_input}" output = get_output(prompt) st.markdown(f"**Output:** {output}") if st.button("Summarize with Variation 2"): prompt = f"Provide a summary of the medical transcription. Highlight the important entities, features, and relationships to CCDA and FHIR objects. {example_input}" output = get_output(prompt) st.markdown(f"**Output:** {output}") if __name__ == "__main__": main()