# SambanNova Langchain Wrappers Usage

In [2]:
import os

from dotenv import load_dotenv
from langchain_embeddings import SambaStudioEmbeddings
from langchain_llms import SambaStudio, SambaNovaCloud
from langchain_chat_models import ChatSambaNovaCloud
from langchain_core.messages import SystemMessage, HumanMessage

current_dir = os.getcwd()
utils_dir = os.path.abspath(os.path.join(current_dir, '..'))
repo_dir = os.path.abspath(os.path.join(utils_dir, '..'))

load_dotenv(os.path.join(repo_dir, '.env'), override=True)

True

# SambaStudio LLM

## Non streaming

In [9]:
llm = SambaStudio(
 streaming=False,
 # base_uri="api/predict/generic",
 model_kwargs={
 'do_sample': False,
 'temperature': 0.01,
 'max_tokens_to_generate': 256,
 'process_prompt': False,
 'select_expert': 'Meta-Llama-3-70B-Instruct-4096',
 },
)

In [11]:
llm.invoke('tell me a 50 word tale')

' of a brave knight\nSir Valoric, the fearless knight, charged into the dark forest, his armor shining like the sun. He battled the dragon, its fiery breath singeing his beard, but he stood tall, his sword flashing in the moonlight, until the beast lay defeated at his feet, its treasure his noble reward.'

## Streaming

In [None]:
llm = SambaStudio(
 streaming=True,
 model_kwargs={
 'do_sample': False,
 'max_tokens_to_generate': 256,
 'temperature': 0.01,
 'process_prompt': False,
 'select_expert': 'Meta-Llama-3-70B-Instruct-4096',
 },
)

In [None]:
for chunk in llm.stream('tell me a 50 word tale'):
 print(chunk, end='', flush=True)

 of a character who is a master of disguise

Sure! Here is a 50-word tale of a character who is a master of disguise:

"Araxys, the skilled disguise artist, transformed into a stunning mermaid to infiltrate a pirate's lair. With a flick of her tail, she charmed the pirates and stole their treasure."

# SambaNovaCloud LLM

## Non Streaming

In [4]:
llm = SambaNovaCloud(model='llama3-70b')

In [5]:
import json

llm.invoke(json.dumps([{'role': 'user', 'content': 'hello'}]))

'Hello. How can I assist you today?'

In [6]:
llm.invoke('hello')

'Hello. How can I assist you today?'

## Streaming

In [7]:
for i in llm.stream('hello tell me a long story'):
 print(i)


Here's a long story 
for you:

Once upon 
a time, in a small village 
nestled in the rolling hills of 
rural France, there lived a 
young girl named Sophie. Sophie 
was a curious and adventurous 
child, with a mop of curly 
brown hair and a smile that 
could light up the darkest 
of rooms. She lived with 
her parents, Pierre and 
Colette, in a small stone cottage 
on the outskirts of 
the village.

Sophie's village was 
a charming 
place, filled with narrow 
cobblestone streets, quaint shops, 
and 
bustling cafes. The villagers 
were a tight-knit 
community, and everyone knew each 
other's names and stories. Sophie 
loved listening to the villagers' 
tales of 
old, which 
often featured brave knights, 
beautiful princesses, and 
magical creatures.

One day, while exploring 
the village, Sophie stumbled upon 
a small, mysterious shop tucked 
away on a quiet street. 
The sign above the door 
read "Curios 
and Wonders," and the 
windows were filled 
with a dazzling array of strange 
and 

# SambaNova Cloud Chat Model

## Non Streaming

In [4]:
llm = ChatSambaNovaCloud(
 model= "llama3-405b",
 max_tokens=1024,
 temperature=0.7,
 top_k=1,
 top_p=0.01,
 stream_options={'include_usage':True}
 )

In [5]:
llm.invoke("tell me a joke")

AIMessage(content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?"\n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."', response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 6.875, 'completion_tokens': 54, 'completion_tokens_after_first_per_sec': 146.48573712341215, 'completion_tokens_after_first_per_sec_first_ten': 172.9005798161617, 'completion_tokens_per_sec': 81.99632208428116, 'end_time': 1726178488.071125, 'is_last_response': True, 'prompt_tokens': 40, 'start_time': 1726178487.3630672, 'time_to_first_token': 0.34624791145324707, 'total_latency': 0.658566123789007, 'total_tokens': 94, 'total_tokens_per_sec': 142.73433844300794}, 'model_name': 'Meta-Llama-3.1-405B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1726178487}, id='a5590b89-4853-4bd9-9fd8-83276b369278')

In [7]:
messages = [
 SystemMessage(content="You are a helpful assistant with pirate accent"),
 HumanMessage(content="tell me a joke")
 ]
llm.invoke(messages)

AIMessage(content="Yer lookin' fer a joke, eh? Alright then, matey! Here be one fer ye:\n\nWhy did the pirate quit his job?\n\n(pause fer dramatic effect)\n\nBecause he was sick o' all the arrrr-guments!\n\nYarrr, hope that made ye laugh, me hearty!", response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 5.583333333333333, 'completion_tokens': 64, 'completion_tokens_after_first_per_sec': 120.91573778458478, 'completion_tokens_after_first_per_sec_first_ten': 140.3985499426452, 'completion_tokens_per_sec': 79.98855768735817, 'end_time': 1726065701.9732044, 'is_last_response': True, 'prompt_tokens': 48, 'start_time': 1726065701.107911, 'time_to_first_token': 0.3442692756652832, 'total_latency': 0.8001144394945743, 'total_tokens': 112, 'total_tokens_per_sec': 139.9799759528768}, 'model_name': 'Meta-Llama-3.1-405B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1726065701}, id='7b0748bb-c5f7-4696-ae56-03b734b60fb9')

In [8]:
future_response = llm.ainvoke("tell me a joke")
await(future_response) 

AIMessage(content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?"\n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."', response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 6.875, 'completion_tokens': 54, 'completion_tokens_after_first_per_sec': 146.72813415408498, 'completion_tokens_after_first_per_sec_first_ten': 172.71830994351703, 'completion_tokens_per_sec': 82.34884281970663, 'end_time': 1726065746.6364844, 'is_last_response': True, 'prompt_tokens': 40, 'start_time': 1726065745.932173, 'time_to_first_token': 0.34309911727905273, 'total_latency': 0.6557469194585627, 'total_tokens': 94, 'total_tokens_per_sec': 143.34798564911895}, 'model_name': 'Meta-Llama-3.1-405B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1726065745}, id='27e7d4fe-8e24-419a-b75b-51ea2519781b')

## Batching

In [9]:
llm = ChatSambaNovaCloud(
 model= "llama3-405b",
 streaming=False,
 max_tokens=1024,
 temperature=0.7,
 top_k=1,
 top_p=0.01,
 stream_options={'include_usage':True}
 )

In [11]:
llm.batch(["tell me a joke","which is the capital of UK?"])

[AIMessage(content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?"\n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."', response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 6.875, 'completion_tokens': 54, 'completion_tokens_after_first_per_sec': 146.72232349940003, 'completion_tokens_after_first_per_sec_first_ten': 173.01988455676758, 'completion_tokens_per_sec': 82.21649876350362, 'end_time': 1726065879.4066722, 'is_last_response': True, 'prompt_tokens': 40, 'start_time': 1726065878.700746, 'time_to_first_token': 0.3446996212005615, 'total_latency': 0.656802476536144, 'total_tokens': 94, 'total_tokens_per_sec': 143.1176089586915}, 'model_name': 'Meta-Llama-3.1-405B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1726065878}, id='28d3a38b-5dae-4d62-bf6c-cface081df34'),
 AIMessage(content='The capital of the United Kingdom is London.', response_metadata={'

In [13]:
future_responses = llm.abatch(["tell me a joke","which is the capital of UK?"])
await(future_responses)

 future_responses = llm.abatch(["tell me a joke","which is the capital of UK?"])


[AIMessage(content='A man walked into a library and asked the librarian, "Do you have any books on Pavlov\'s dogs and Schrödinger\'s cat?"\n\nThe librarian replied, "It rings a bell, but I\'m not sure if it\'s here or not."', response_metadata={'finish_reason': 'stop', 'usage': {'acceptance_rate': 6.875, 'completion_tokens': 54, 'completion_tokens_after_first_per_sec': 120.34699641554552, 'completion_tokens_after_first_per_sec_first_ten': 141.51170437257693, 'completion_tokens_per_sec': 36.223157123884754, 'end_time': 1726065914.8678048, 'is_last_response': True, 'prompt_tokens': 40, 'start_time': 1726065913.3182464, 'time_to_first_token': 1.1091651916503906, 'total_latency': 1.4907590692693538, 'total_tokens': 94, 'total_tokens_per_sec': 63.05512536379939}, 'model_name': 'Meta-Llama-3.1-405B-Instruct', 'system_fingerprint': 'fastcoe', 'created': 1726065913}, id='f279d0fb-70b5-428c-9283-457b9831b559'),
 AIMessage(content='The capital of the United Kingdom is London.', response_metadata

## Streaming

In [14]:
llm = ChatSambaNovaCloud(
 model= "llama3-405b",
 streaming=True,
 max_tokens=1024,
 temperature=0.7,
 top_k=1,
 top_p=0.01,
 stream_options={'include_usage':True}
 )

In [15]:
for chunk in llm.stream("tell me a joke"):
 print(chunk.content)


A man walked into a 
library and asked the 
librarian, "Do you have any books 
on Pavlov's dogs 
and Schrödinger's cat?"


The librarian 
replied, "It rings a bell, 
but I'm not sure 
if it's here 
or not."





In [16]:
messages = [
 SystemMessage(content="You are a helpful assistant with pirate accent"),
 HumanMessage(content="tell me a joke")
 ]
for chunk in llm.stream(messages):
 print(chunk.content)


Yer lookin' 
fer a joke, eh? 
Alright then, matey! 
Here be one fer 
ye:

Why did the pirate quit his job?



(pause fer 
dramatic effect)

Because he was sick 
o' all the arrrr-guments!




Yarrr, hope that made ye 
laugh, 
me hearty!





In [17]:
async for chunk in llm.astream("tell me a joke"):
 print(chunk.content)


A man walked into a 
library and asked the 
librarian, "Do you have any books 
on Pavlov's dogs 
and Schrödinger's cat?"


The librarian 
replied, "It rings a bell, 
but I'm not sure 
if it's here 
or not."





# Sambastudio Embeddings

In [None]:
embedding = SambaStudioEmbeddings(batch_size=1, model_kwargs={'select_expert': 'e5-mistral-7b-instruct'})
embedding.embed_documents(['tell me a 50 word tale', 'tell me a joke'])
embedding.embed_query('tell me a 50 word tale')

In [13]:
from langchain.schema import Document
from langchain.vectorstores import Chroma

docs = [
 'tell me a 50 word tale',
 'tell me a joke',
 'when was America discoverd?',
 'how to build an engine?',
 'give me 3 party activities',
 'give me three healty dishes',
]
docs = [Document(doc) for doc in docs]

query = 'prompt for generating something fun'

vectordb = Chroma.from_documents(docs, embedding)
retriever = vectordb.as_retriever()

retriever.get_relevant_documents(query)

 warn_deprecated(


[Document(page_content='tell me a 50 word tale'),
 Document(page_content='tell me a joke'),
 Document(page_content='give me 3 party activities'),
 Document(page_content='give me three healty dishes')]