156 lines
5.6 KiB
JavaScript
156 lines
5.6 KiB
JavaScript
import { connect, WriteMode } from "vectordb";
|
|
import { VectorStore } from "@langchain/core/vectorstores";
|
|
import { Document } from "@langchain/core/documents";
|
|
/**
|
|
* A wrapper for an open-source database for vector-search with persistent
|
|
* storage. It simplifies retrieval, filtering, and management of
|
|
* embeddings.
|
|
*/
|
|
export class LanceDB extends VectorStore {
|
|
constructor(embeddings, args) {
|
|
super(embeddings, args || {});
|
|
Object.defineProperty(this, "table", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "textKey", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "uri", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "tableName", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "mode", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
this.table = args?.table;
|
|
this.embeddings = embeddings;
|
|
this.textKey = args?.textKey || "text";
|
|
this.uri = args?.uri || "~/lancedb";
|
|
this.tableName = args?.tableName || "langchain";
|
|
this.mode = args?.mode || WriteMode.Overwrite;
|
|
}
|
|
/**
|
|
* Adds documents to the database.
|
|
* @param documents The documents to be added.
|
|
* @returns A Promise that resolves when the documents have been added.
|
|
*/
|
|
async addDocuments(documents) {
|
|
const texts = documents.map(({ pageContent }) => pageContent);
|
|
return this.addVectors(await this.embeddings.embedDocuments(texts), documents);
|
|
}
|
|
_vectorstoreType() {
|
|
return "lancedb";
|
|
}
|
|
/**
|
|
* Adds vectors and their corresponding documents to the database.
|
|
* @param vectors The vectors to be added.
|
|
* @param documents The corresponding documents to be added.
|
|
* @returns A Promise that resolves when the vectors and documents have been added.
|
|
*/
|
|
async addVectors(vectors, documents) {
|
|
if (vectors.length === 0) {
|
|
return;
|
|
}
|
|
if (vectors.length !== documents.length) {
|
|
throw new Error(`Vectors and documents must have the same length`);
|
|
}
|
|
const data = [];
|
|
for (let i = 0; i < documents.length; i += 1) {
|
|
const record = {
|
|
vector: vectors[i],
|
|
[this.textKey]: documents[i].pageContent,
|
|
};
|
|
Object.keys(documents[i].metadata).forEach((metaKey) => {
|
|
record[metaKey] = documents[i].metadata[metaKey];
|
|
});
|
|
data.push(record);
|
|
}
|
|
if (!this.table) {
|
|
const db = await connect(this.uri);
|
|
this.table = await db.createTable(this.tableName, data, {
|
|
writeMode: this.mode,
|
|
});
|
|
return;
|
|
}
|
|
await this.table.add(data);
|
|
}
|
|
/**
|
|
* Performs a similarity search on the vectors in the database and returns
|
|
* the documents and their scores.
|
|
* @param query The query vector.
|
|
* @param k The number of results to return.
|
|
* @returns A Promise that resolves with an array of tuples, each containing a Document and its score.
|
|
*/
|
|
async similaritySearchVectorWithScore(query, k) {
|
|
if (!this.table) {
|
|
throw new Error("Table not found. Please add vectors to the table first.");
|
|
}
|
|
const results = await this.table.search(query).limit(k).execute();
|
|
const docsAndScore = [];
|
|
results.forEach((item) => {
|
|
const metadata = {};
|
|
Object.keys(item).forEach((key) => {
|
|
if (key !== "vector" && key !== "score" && key !== this.textKey) {
|
|
metadata[key] = item[key];
|
|
}
|
|
});
|
|
docsAndScore.push([
|
|
new Document({
|
|
pageContent: item[this.textKey],
|
|
metadata,
|
|
}),
|
|
item.score,
|
|
]);
|
|
});
|
|
return docsAndScore;
|
|
}
|
|
/**
|
|
* Creates a new instance of LanceDB from texts.
|
|
* @param texts The texts to be converted into documents.
|
|
* @param metadatas The metadata for the texts.
|
|
* @param embeddings The embeddings to be managed.
|
|
* @param dbConfig The configuration for the LanceDB instance.
|
|
* @returns A Promise that resolves with a new instance of LanceDB.
|
|
*/
|
|
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
const docs = [];
|
|
for (let i = 0; i < texts.length; i += 1) {
|
|
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
const newDoc = new Document({
|
|
pageContent: texts[i],
|
|
metadata,
|
|
});
|
|
docs.push(newDoc);
|
|
}
|
|
return LanceDB.fromDocuments(docs, embeddings, dbConfig);
|
|
}
|
|
/**
|
|
* Creates a new instance of LanceDB from documents.
|
|
* @param docs The documents to be added to the database.
|
|
* @param embeddings The embeddings to be managed.
|
|
* @param dbConfig The configuration for the LanceDB instance.
|
|
* @returns A Promise that resolves with a new instance of LanceDB.
|
|
*/
|
|
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
const instance = new this(embeddings, dbConfig);
|
|
await instance.addDocuments(docs);
|
|
return instance;
|
|
}
|
|
}
|