How to use Minstral with Node.js / transformers.js?
#52
by
lancejpollard
- opened
Doing this:
import 'dotenv/config'
import fs from 'fs/promises'
import {
env,
pipeline,
AutoTokenizer,
AutoModelForCausalLM,
} from '@xenova/transformers'
// env.allowRemoteModels = false
// env.localModelPath = './import/language/tibetan/models'
// # Use a pipeline as a high-level helper
// from transformers import pipeline
// pipe = pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B")
async function summarizeDefinitions(definitions) {
// Load the tokenizer
const tokenizer = await AutoTokenizer.from_pretrained(
'mistralai/Mistral-Nemo-Instruct-2407',
)
// Load the model
const model = await AutoModelForCausalLM.from_pretrained(
'mistralai/Mistral-Nemo-Instruct-2407',
)
const summarizer = await pipeline('text-generation', model, tokenizer)
const cleanedDefinitions = {}
let i = 0
for (const term in definitions) {
const defs = definitions[term]
const combinedDefs = `Please summarize these definitions into a JSON array of simple ideally 1-3 word definitions: ${JSON.stringify(
defs,
null,
2,
)}`
// Summarize the combined definitions
const summary = await summarizer(combinedDefs, {
max_length: 1000, // adjust length based on your requirements
min_length: 1,
do_sample: false,
})
console.log(summary)
// Clean up the summary to create 1-3 word definitions
const cleaned = summary[0].summary_text
.split('.')
.map(s => s.trim())
.filter(s => s.length > 0)
.map(s =>
s
.split(',')
.map(ss => ss.trim())
.filter(ss => ss.length <= 3),
)
cleanedDefinitions[term] = {
definitions: cleaned.flat(),
// type: 'noun', // or determine part-of-speech based on your logic
}
if (i === 100) {
break
}
i++
}
return cleanedDefinitions
}
async function main() {
const definitions = JSON.parse(
await fs.readFile(
`import/language/tibetan/definitions.out.json`,
`utf-8`,
),
)
const cleanedDefinitions = await summarizeDefinitions(definitions)
console.log(cleanedDefinitions)
}
main()
I get:
Error: Unauthorized access to file: "https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407/resolve/main/tokenizer_config.json".
What do I need to do to get this working?
If I add an access token, I get this error now:
Error: Could not locate file: "https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407/resolve/main/onnx/decoder_model_merged_quantized.onnx".
Any ideas?