const question = req.body.question;
// Fetch embeddings from the API
const embeddingsRequestOptions = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: 'llama2', prompt: question }),
};
const embeddingResponse = await fetch('http://localhost:11434/api/embeddings', embeddingsRequestOptions);
const embeddingJson = await embeddingResponse.json();
const xq = embeddingJson.embedding;
// Match with Saved Embeddings
for (let i = 0; i < savedEmbeddings.length; i++) {
const xc = savedEmbeddings[i].values;
const score = similarity(xq, xc);
if (score > maxScore) {
maxScore = score;
maxScoreIndex = i;
}
}
// Fetch the metadata from the matched embedding
const savedMetaData = savedEmbeddings[maxScoreIndex].metadata;
const systemContextQuestion = 'SystemPrompt: " ", Context: savedMetadata, Question: question;
// Fetch response from the API
const requestOptions = {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ model: 'llama2', prompt: systemContextQuestion }),
};
// Generate a Response
const apiResponse = await fetch('http://localhost:11434/api/generate', requestOptions);
// Completion
const completion = await apiResponse.json();
Using this method we can create embeddings on data, save them to a file locally and perform RAG AI applications without vector databases and while using a modal locally.