agsamantha/node_modules/@langchain/community/dist/vectorstores/usearch.cjs

218 lines
8.9 KiB
JavaScript
Raw Normal View History

2024-10-02 15:15:21 -05:00
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.USearch = void 0;
const usearch_1 = __importDefault(require("usearch"));
const uuid = __importStar(require("uuid"));
const vectorstores_1 = require("@langchain/core/vectorstores");
const documents_1 = require("@langchain/core/documents");
const in_memory_js_1 = require("../stores/doc/in_memory.cjs");
/**
* Class that extends `SaveableVectorStore` and provides methods for
* adding documents and vectors to a `usearch` index, performing
* similarity searches, and saving the index.
*/
class USearch extends vectorstores_1.SaveableVectorStore {
_vectorstoreType() {
return "usearch";
}
constructor(embeddings, args) {
super(embeddings, args);
Object.defineProperty(this, "_index", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_mapping", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "docstore", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "args", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.args = args;
this._index = args.index;
this._mapping = args.mapping ?? {};
this.embeddings = embeddings;
this.docstore = args?.docstore ?? new in_memory_js_1.SynchronousInMemoryDocstore();
}
/**
* Method that adds documents to the `usearch` index. It generates
* embeddings for the documents and adds them to the index.
* @param documents An array of `Document` instances to be added to the index.
* @returns A promise that resolves with an array of document IDs.
*/
async addDocuments(documents) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
}
get index() {
if (!this._index) {
throw new Error("Vector store not initialised yet. Try calling `fromTexts` or `fromDocuments` first.");
}
return this._index;
}
set index(index) {
this._index = index;
}
/**
* Method that adds vectors to the `usearch` index. It also updates the
* mapping between vector IDs and document IDs.
* @param vectors An array of vectors to be added to the index.
* @param documents An array of `Document` instances corresponding to the vectors.
* @returns A promise that resolves with an array of document IDs.
*/
async addVectors(vectors, documents) {
if (vectors.length === 0) {
return [];
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and documents must have the same length`);
}
const dv = vectors[0].length;
if (!this._index) {
this._index = new usearch_1.default.Index({
metric: "l2sq",
connectivity: BigInt(16),
dimensions: BigInt(dv),
});
}
const d = this.index.dimensions();
if (BigInt(dv) !== d) {
throw new Error(`Vectors must have the same length as the number of dimensions (${d})`);
}
const docstoreSize = this.index.size();
const documentIds = [];
for (let i = 0; i < vectors.length; i += 1) {
const documentId = uuid.v4();
documentIds.push(documentId);
const id = Number(docstoreSize) + i;
this.index.add(BigInt(id), new Float32Array(vectors[i]));
this._mapping[id] = documentId;
this.docstore.add({ [documentId]: documents[i] });
}
return documentIds;
}
/**
* Method that performs a similarity search in the `usearch` index. It
* returns the `k` most similar documents to a given query vector, along
* with their similarity scores.
* @param query The query vector.
* @param k The number of most similar documents to return.
* @returns A promise that resolves with an array of tuples, each containing a `Document` and its similarity score.
*/
async similaritySearchVectorWithScore(query, k) {
const d = this.index.dimensions();
if (BigInt(query.length) !== d) {
throw new Error(`Query vector must have the same length as the number of dimensions (${d})`);
}
if (k > this.index.size()) {
const total = this.index.size();
console.warn(`k (${k}) is greater than the number of elements in the index (${total}), setting k to ${total}`);
// eslint-disable-next-line no-param-reassign
k = Number(total);
}
const result = this.index.search(new Float32Array(query), BigInt(k));
const return_list = [];
for (let i = 0; i < result.count; i += 1) {
const uuid = this._mapping[Number(result.keys[i])];
return_list.push([this.docstore.search(uuid), result.distances[i]]);
}
return return_list;
}
/**
* Method that saves the `usearch` index and the document store to disk.
* @param directory The directory where the index and document store should be saved.
* @returns A promise that resolves when the save operation is complete.
*/
async save(directory) {
const fs = await import("node:fs/promises");
const path = await import("node:path");
await fs.mkdir(directory, { recursive: true });
await Promise.all([
this.index.save(path.join(directory, "usearch.index")),
await fs.writeFile(path.join(directory, "docstore.json"), JSON.stringify([
Array.from(this.docstore._docs.entries()),
this._mapping,
])),
]);
}
/**
* Static method that creates a new `USearch` instance from a list of
* texts. It generates embeddings for the texts and adds them to the
* `usearch` index.
* @param texts An array of texts to be added to the index.
* @param metadatas Metadata associated with the texts.
* @param embeddings An instance of `Embeddings` used to generate embeddings for the texts.
* @param dbConfig Optional configuration for the document store.
* @returns A promise that resolves with a new `USearch` instance.
*/
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new documents_1.Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return this.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Static method that creates a new `USearch` instance from a list of
* documents. It generates embeddings for the documents and adds them to
* the `usearch` index.
* @param docs An array of `Document` instances to be added to the index.
* @param embeddings An instance of `Embeddings` used to generate embeddings for the documents.
* @param dbConfig Optional configuration for the document store.
* @returns A promise that resolves with a new `USearch` instance.
*/
static async fromDocuments(docs, embeddings, dbConfig) {
const args = {
docstore: dbConfig?.docstore,
};
const instance = new this(embeddings, args);
await instance.addDocuments(docs);
return instance;
}
}
exports.USearch = USearch;