Mistral 7B Instruct v0.1 for Web-LLM q4f32_1
This is a compiled version of Mistral-7B-Instruct-v0.1 for MLC Web-LLM, using q4f32_1 quantization.
Usage
import * as webLLM from "@mlc-ai/web-llm";
const modelId = "Mistral-7B-Instruct-v0.1-q4f32_1";
const appConfig = {
model_list: [
{
model_url:
"https://huggingface.co/Felladrin/mlc-chat-Mistral-7B-Instruct-v0.1-q4f32_1/resolve/main/params/",
local_id: modelId,
},
],
model_lib_map: {
[modelId]:
"https://huggingface.co/Felladrin/mlc-chat-Mistral-7B-Instruct-v0.1-q4f32_1/resolve/main/Mistral-7B-Instruct-v0.1-q4f32_1-webgpu.wasm",
},
};
const chatConfig = {
temperature: 0,
repetition_penalty: 1.2,
top_p: 1
};
async function main() {
const chat = new webLLM.ChatModule();
await chat.reload(modelId, chatConfig, appConfig);
let lastResponse = "";
const generateProgressCallback = (_, message = "") => {
if (message.length === 0) return chat.interruptGenerate();
lastResponse = message;
console.log(`Partial response: ${lastResponse}`);
};
const fistPrompt = "Could answer some questions?";
await chat.generate(fistPrompt, generateProgressCallback);
console.log(`Complete response: ${lastResponse}`);
const secondPrompt = "What's Mistral?";
await chat.generate(secondPrompt, generateProgressCallback);
console.log(`Complete response: ${lastResponse}`);
console.info(await chat.runtimeStatsText());
}
main();
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support