agsamantha/node_modules/@langchain/community/dist/vectorstores/usearch.js

188 lines
7.6 KiB
JavaScript
Raw Normal View History

2024-10-02 15:15:21 -05:00
import usearch from "usearch";
import * as uuid from "uuid";
import { SaveableVectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js";
/**
* Class that extends `SaveableVectorStore` and provides methods for
* adding documents and vectors to a `usearch` index, performing
* similarity searches, and saving the index.
*/
export class USearch extends SaveableVectorStore {
_vectorstoreType() {
return "usearch";
}
constructor(embeddings, args) {
super(embeddings, args);
Object.defineProperty(this, "_index", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_mapping", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "docstore", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "args", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.args = args;
this._index = args.index;
this._mapping = args.mapping ?? {};
this.embeddings = embeddings;
this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore();
}
/**
* Method that adds documents to the `usearch` index. It generates
* embeddings for the documents and adds them to the index.
* @param documents An array of `Document` instances to be added to the index.
* @returns A promise that resolves with an array of document IDs.
*/
async addDocuments(documents) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
}
get index() {
if (!this._index) {
throw new Error("Vector store not initialised yet. Try calling `fromTexts` or `fromDocuments` first.");
}
return this._index;
}
set index(index) {
this._index = index;
}
/**
* Method that adds vectors to the `usearch` index. It also updates the
* mapping between vector IDs and document IDs.
* @param vectors An array of vectors to be added to the index.
* @param documents An array of `Document` instances corresponding to the vectors.
* @returns A promise that resolves with an array of document IDs.
*/
async addVectors(vectors, documents) {
if (vectors.length === 0) {
return [];
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and documents must have the same length`);
}
const dv = vectors[0].length;
if (!this._index) {
this._index = new usearch.Index({
metric: "l2sq",
connectivity: BigInt(16),
dimensions: BigInt(dv),
});
}
const d = this.index.dimensions();
if (BigInt(dv) !== d) {
throw new Error(`Vectors must have the same length as the number of dimensions (${d})`);
}
const docstoreSize = this.index.size();
const documentIds = [];
for (let i = 0; i < vectors.length; i += 1) {
const documentId = uuid.v4();
documentIds.push(documentId);
const id = Number(docstoreSize) + i;
this.index.add(BigInt(id), new Float32Array(vectors[i]));
this._mapping[id] = documentId;
this.docstore.add({ [documentId]: documents[i] });
}
return documentIds;
}
/**
* Method that performs a similarity search in the `usearch` index. It
* returns the `k` most similar documents to a given query vector, along
* with their similarity scores.
* @param query The query vector.
* @param k The number of most similar documents to return.
* @returns A promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(query, k) {
const d = this.index.dimensions();
if (BigInt(query.length) !== d) {
throw new Error(`Query vector must have the same length as the number of dimensions (${d})`);
}
if (k > this.index.size()) {
const total = this.index.size();
console.warn(`k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}`);
// eslint-disable-next-line no-param-reassign
k = Number(total);
}
const result = this.index.search(new Float32Array(query), BigInt(k));
const return_list = [];
for (let i = 0; i < result.count; i += 1) {
const uuid = this._mapping[Number(result.keys[i])];
return_list.push([this.docstore.search(uuid), result.distances[i]]);
}
return return_list;
}
/**
* Method that saves the `usearch` index and the document store to disk.
* @param directory The directory where the index and document store should be saved.
* @returns A promise that resolves when the save operation is complete.
*/
async save(directory) {
const fs = await import("node:fs/promises");
const path = await import("node:path");
await fs.mkdir(directory, { recursive: true });
await Promise.all([
this.index.save(path.join(directory, "usearch.index")),
await fs.writeFile(path.join(directory, "docstore.json"), JSON.stringify([
Array.from(this.docstore._docs.entries()),
this._mapping,
])),
]);
}
/**
* Static method that creates a new `USearch` instance from a list of
* texts. It generates embeddings for the texts and adds them to the
* `usearch` index.
* @param texts An array of texts to be added to the index.
* @param metadatas Metadata associated with the texts.
* @param embeddings An instance of `Embeddings` used to generate embeddings for the texts.
* @param dbConfig Optional configuration for the document store.
* @returns A promise that resolves with a new `USearch` instance.
*/
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return this.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Static method that creates a new `USearch` instance from a list of
* documents. It generates embeddings for the documents and adds them to
* the `usearch` index.
* @param docs An array of `Document` instances to be added to the index.
* @param embeddings An instance of `Embeddings` used to generate embeddings for the documents.
* @param dbConfig Optional configuration for the document store.
* @returns A promise that resolves with a new `USearch` instance.
*/
static async fromDocuments(docs, embeddings, dbConfig) {
const args = {
docstore: dbConfig?.docstore,
};
const instance = new this(embeddings, args);
await instance.addDocuments(docs);
return instance;
}
}