188 lines
7.6 KiB
JavaScript
188 lines
7.6 KiB
JavaScript
|
import usearch from "usearch";
|
||
|
import * as uuid from "uuid";
|
||
|
import { SaveableVectorStore } from "@langchain/core/vectorstores";
|
||
|
import { Document } from "@langchain/core/documents";
|
||
|
import { SynchronousInMemoryDocstore } from "../stores/doc/in_memory.js";
|
||
|
/**
|
||
|
* Class that extends `SaveableVectorStore` and provides methods for
|
||
|
* adding documents and vectors to a `usearch` index, performing
|
||
|
* similarity searches, and saving the index.
|
||
|
*/
|
||
|
export class USearch extends SaveableVectorStore {
|
||
|
_vectorstoreType() {
|
||
|
return "usearch";
|
||
|
}
|
||
|
constructor(embeddings, args) {
|
||
|
super(embeddings, args);
|
||
|
Object.defineProperty(this, "_index", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_mapping", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "docstore", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "args", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
this.args = args;
|
||
|
this._index = args.index;
|
||
|
this._mapping = args.mapping ?? {};
|
||
|
this.embeddings = embeddings;
|
||
|
this.docstore = args?.docstore ?? new SynchronousInMemoryDocstore();
|
||
|
}
|
||
|
/**
|
||
|
* Method that adds documents to the `usearch` index. It generates
|
||
|
* embeddings for the documents and adds them to the index.
|
||
|
* @param documents An array of `Document` instances to be added to the index.
|
||
|
* @returns A promise that resolves with an array of document IDs.
|
||
|
*/
|
||
|
async addDocuments(documents) {
|
||
|
const texts = documents.map(({ pageContent }) => pageContent);
|
||
|
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
|
||
|
}
|
||
|
get index() {
|
||
|
if (!this._index) {
|
||
|
throw new Error("Vector store not initialised yet. Try calling `fromTexts` or `fromDocuments` first.");
|
||
|
}
|
||
|
return this._index;
|
||
|
}
|
||
|
set index(index) {
|
||
|
this._index = index;
|
||
|
}
|
||
|
/**
|
||
|
* Method that adds vectors to the `usearch` index. It also updates the
|
||
|
* mapping between vector IDs and document IDs.
|
||
|
* @param vectors An array of vectors to be added to the index.
|
||
|
* @param documents An array of `Document` instances corresponding to the vectors.
|
||
|
* @returns A promise that resolves with an array of document IDs.
|
||
|
*/
|
||
|
async addVectors(vectors, documents) {
|
||
|
if (vectors.length === 0) {
|
||
|
return [];
|
||
|
}
|
||
|
if (vectors.length !== documents.length) {
|
||
|
throw new Error(`Vectors and documents must have the same length`);
|
||
|
}
|
||
|
const dv = vectors[0].length;
|
||
|
if (!this._index) {
|
||
|
this._index = new usearch.Index({
|
||
|
metric: "l2sq",
|
||
|
connectivity: BigInt(16),
|
||
|
dimensions: BigInt(dv),
|
||
|
});
|
||
|
}
|
||
|
const d = this.index.dimensions();
|
||
|
if (BigInt(dv) !== d) {
|
||
|
throw new Error(`Vectors must have the same length as the number of dimensions (${d})`);
|
||
|
}
|
||
|
const docstoreSize = this.index.size();
|
||
|
const documentIds = [];
|
||
|
for (let i = 0; i < vectors.length; i += 1) {
|
||
|
const documentId = uuid.v4();
|
||
|
documentIds.push(documentId);
|
||
|
const id = Number(docstoreSize) + i;
|
||
|
this.index.add(BigInt(id), new Float32Array(vectors[i]));
|
||
|
this._mapping[id] = documentId;
|
||
|
this.docstore.add({ [documentId]: documents[i] });
|
||
|
}
|
||
|
return documentIds;
|
||
|
}
|
||
|
/**
|
||
|
* Method that performs a similarity search in the `usearch` index. It
|
||
|
* returns the `k` most similar documents to a given query vector, along
|
||
|
* with their similarity scores.
|
||
|
* @param query The query vector.
|
||
|
* @param k The number of most similar documents to return.
|
||
|
* @returns A promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
|
||
|
*/
|
||
|
async similaritySearchVectorWithScore(query, k) {
|
||
|
const d = this.index.dimensions();
|
||
|
if (BigInt(query.length) !== d) {
|
||
|
throw new Error(`Query vector must have the same length as the number of dimensions (${d})`);
|
||
|
}
|
||
|
if (k > this.index.size()) {
|
||
|
const total = this.index.size();
|
||
|
console.warn(`k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}`);
|
||
|
// eslint-disable-next-line no-param-reassign
|
||
|
k = Number(total);
|
||
|
}
|
||
|
const result = this.index.search(new Float32Array(query), BigInt(k));
|
||
|
const return_list = [];
|
||
|
for (let i = 0; i < result.count; i += 1) {
|
||
|
const uuid = this._mapping[Number(result.keys[i])];
|
||
|
return_list.push([this.docstore.search(uuid), result.distances[i]]);
|
||
|
}
|
||
|
return return_list;
|
||
|
}
|
||
|
/**
|
||
|
* Method that saves the `usearch` index and the document store to disk.
|
||
|
* @param directory The directory where the index and document store should be saved.
|
||
|
* @returns A promise that resolves when the save operation is complete.
|
||
|
*/
|
||
|
async save(directory) {
|
||
|
const fs = await import("node:fs/promises");
|
||
|
const path = await import("node:path");
|
||
|
await fs.mkdir(directory, { recursive: true });
|
||
|
await Promise.all([
|
||
|
this.index.save(path.join(directory, "usearch.index")),
|
||
|
await fs.writeFile(path.join(directory, "docstore.json"), JSON.stringify([
|
||
|
Array.from(this.docstore._docs.entries()),
|
||
|
this._mapping,
|
||
|
])),
|
||
|
]);
|
||
|
}
|
||
|
/**
|
||
|
* Static method that creates a new `USearch` instance from a list of
|
||
|
* texts. It generates embeddings for the texts and adds them to the
|
||
|
* `usearch` index.
|
||
|
* @param texts An array of texts to be added to the index.
|
||
|
* @param metadatas Metadata associated with the texts.
|
||
|
* @param embeddings An instance of `Embeddings` used to generate embeddings for the texts.
|
||
|
* @param dbConfig Optional configuration for the document store.
|
||
|
* @returns A promise that resolves with a new `USearch` instance.
|
||
|
*/
|
||
|
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
|
||
|
const docs = [];
|
||
|
for (let i = 0; i < texts.length; i += 1) {
|
||
|
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
||
|
const newDoc = new Document({
|
||
|
pageContent: texts[i],
|
||
|
metadata,
|
||
|
});
|
||
|
docs.push(newDoc);
|
||
|
}
|
||
|
return this.fromDocuments(docs, embeddings, dbConfig);
|
||
|
}
|
||
|
/**
|
||
|
* Static method that creates a new `USearch` instance from a list of
|
||
|
* documents. It generates embeddings for the documents and adds them to
|
||
|
* the `usearch` index.
|
||
|
* @param docs An array of `Document` instances to be added to the index.
|
||
|
* @param embeddings An instance of `Embeddings` used to generate embeddings for the documents.
|
||
|
* @param dbConfig Optional configuration for the document store.
|
||
|
* @returns A promise that resolves with a new `USearch` instance.
|
||
|
*/
|
||
|
static async fromDocuments(docs, embeddings, dbConfig) {
|
||
|
const args = {
|
||
|
docstore: dbConfig?.docstore,
|
||
|
};
|
||
|
const instance = new this(embeddings, args);
|
||
|
await instance.addDocuments(docs);
|
||
|
return instance;
|
||
|
}
|
||
|
}
|