agsamantha/node_modules/@langchain/community/dist/vectorstores/lancedb.js
2024-10-02 15:15:21 -05:00

156 lines
5.6 KiB
JavaScript

import { connect, WriteMode } from "vectordb";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
/**
* A wrapper for an open-source database for vector-search with persistent
* storage. It simplifies retrieval, filtering, and management of
* embeddings.
*/
export class LanceDB extends VectorStore {
constructor(embeddings, args) {
super(embeddings, args || {});
Object.defineProperty(this, "table", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "textKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "uri", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "tableName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "mode", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.table = args?.table;
this.embeddings = embeddings;
this.textKey = args?.textKey || "text";
this.uri = args?.uri || "~/lancedb";
this.tableName = args?.tableName || "langchain";
this.mode = args?.mode || WriteMode.Overwrite;
}
/**
* Adds documents to the database.
* @param documents The documents to be added.
* @returns A Promise that resolves when the documents have been added.
*/
async addDocuments(documents) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
}
_vectorstoreType() {
return "lancedb";
}
/**
* Adds vectors and their corresponding documents to the database.
* @param vectors The vectors to be added.
* @param documents The corresponding documents to be added.
* @returns A Promise that resolves when the vectors and documents have been added.
*/
async addVectors(vectors, documents) {
if (vectors.length === 0) {
return;
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and documents must have the same length`);
}
const data = [];
for (let i = 0; i < documents.length; i += 1) {
const record = {
vector: vectors[i],
[this.textKey]: documents[i].pageContent,
};
Object.keys(documents[i].metadata).forEach((metaKey) => {
record[metaKey] = documents[i].metadata[metaKey];
});
data.push(record);
}
if (!this.table) {
const db = await connect(this.uri);
this.table = await db.createTable(this.tableName, data, {
writeMode: this.mode,
});
return;
}
await this.table.add(data);
}
/**
* Performs a similarity search on the vectors in the database and returns
* the documents and their scores.
* @param query The query vector.
* @param k The number of results to return.
* @returns A Promise that resolves with an array of tuples, each containing a Document and its score.
*/
async similaritySearchVectorWithScore(query, k) {
if (!this.table) {
throw new Error("Table not found. Please add vectors to the table first.");
}
const results = await this.table.search(query).limit(k).execute();
const docsAndScore = [];
results.forEach((item) => {
const metadata = {};
Object.keys(item).forEach((key) => {
if (key !== "vector" && key !== "score" && key !== this.textKey) {
metadata[key] = item[key];
}
});
docsAndScore.push([
new Document({
pageContent: item[this.textKey],
metadata,
}),
item.score,
]);
});
return docsAndScore;
}
/**
* Creates a new instance of LanceDB from texts.
* @param texts The texts to be converted into documents.
* @param metadatas The metadata for the texts.
* @param embeddings The embeddings to be managed.
* @param dbConfig The configuration for the LanceDB instance.
* @returns A Promise that resolves with a new instance of LanceDB.
*/
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return LanceDB.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Creates a new instance of LanceDB from documents.
* @param docs The documents to be added to the database.
* @param embeddings The embeddings to be managed.
* @param dbConfig The configuration for the LanceDB instance.
* @returns A Promise that resolves with a new instance of LanceDB.
*/
static async fromDocuments(docs, embeddings, dbConfig) {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
}