agsamantha/node_modules/@langchain/community/dist/vectorstores/weaviate.js
2024-10-02 15:15:21 -05:00

340 lines
14 KiB
JavaScript

import * as uuid from "uuid";
import { VectorStore, } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";
// Note this function is not generic, it is designed specifically for Weaviate
// https://weaviate.io/developers/weaviate/config-refs/datatypes#introduction
/**
* @deprecated Prefer the `@langchain/weaviate` package.
*/
export const flattenObjectForWeaviate = (
// eslint-disable-next-line @typescript-eslint/no-explicit-any
obj) => {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const flattenedObject = {};
for (const key in obj) {
if (!Object.hasOwn(obj, key)) {
continue;
}
const value = obj[key];
if (typeof obj[key] === "object" && !Array.isArray(value)) {
const recursiveResult = flattenObjectForWeaviate(value);
for (const deepKey in recursiveResult) {
if (Object.hasOwn(obj, key)) {
flattenedObject[`${key}_${deepKey}`] = recursiveResult[deepKey];
}
}
}
else if (Array.isArray(value)) {
if (value.length === 0) {
flattenedObject[key] = value;
}
else if (typeof value[0] !== "object" &&
// eslint-disable-next-line @typescript-eslint/no-explicit-any
value.every((el) => typeof el === typeof value[0])) {
// Weaviate only supports arrays of primitive types,
// where all elements are of the same type
flattenedObject[key] = value;
}
}
else {
flattenedObject[key] = value;
}
}
return flattenedObject;
};
/**
* @deprecated Prefer the `@langchain/weaviate` package.
*
* Class that extends the `VectorStore` base class. It provides methods to
* interact with a Weaviate index, including adding vectors and documents,
* deleting data, and performing similarity searches.
*/
export class WeaviateStore extends VectorStore {
_vectorstoreType() {
return "weaviate";
}
constructor(embeddings, args) {
super(embeddings, args);
Object.defineProperty(this, "embeddings", {
enumerable: true,
configurable: true,
writable: true,
value: embeddings
});
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "textKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "queryAttrs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "tenant", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.client = args.client;
this.indexName = args.indexName;
this.textKey = args.textKey || "text";
this.queryAttrs = [this.textKey];
this.tenant = args.tenant;
if (args.metadataKeys) {
this.queryAttrs = [
...new Set([
...this.queryAttrs,
...args.metadataKeys.filter((k) => {
// https://spec.graphql.org/June2018/#sec-Names
// queryAttrs need to be valid GraphQL Names
const keyIsValid = /^[_A-Za-z][_0-9A-Za-z]*$/.test(k);
if (!keyIsValid) {
console.warn(`Skipping metadata key ${k} as it is not a valid GraphQL Name`);
}
return keyIsValid;
}),
]),
];
}
}
/**
* Method to add vectors and corresponding documents to the Weaviate
* index.
* @param vectors Array of vectors to be added.
* @param documents Array of documents corresponding to the vectors.
* @param options Optional parameter that can include specific IDs for the documents.
* @returns An array of document IDs.
*/
async addVectors(vectors, documents, options) {
const documentIds = options?.ids ?? documents.map((_) => uuid.v4());
const batch = documents.map((document, index) => {
if (Object.hasOwn(document.metadata, "id"))
throw new Error("Document inserted to Weaviate vectorstore should not have `id` in their metadata.");
const flattenedMetadata = flattenObjectForWeaviate(document.metadata);
return {
...(this.tenant ? { tenant: this.tenant } : {}),
class: this.indexName,
id: documentIds[index],
vector: vectors[index],
properties: {
[this.textKey]: document.pageContent,
...flattenedMetadata,
},
};
});
try {
const responses = await this.client.batch
.objectsBatcher()
.withObjects(...batch)
.do();
// if storing vectors fails, we need to know why
const errorMessages = [];
responses.forEach((response) => {
if (response?.result?.errors?.error) {
errorMessages.push(...response.result.errors.error.map((err) => err.message ??
"!! Unfortunately no error message was presented in the API response !!"));
}
});
if (errorMessages.length > 0) {
throw new Error(errorMessages.join("\n"));
}
}
catch (e) {
throw Error(`Error adding vectors: ${e}`);
}
return documentIds;
}
/**
* Method to add documents to the Weaviate index. It first generates
* vectors for the documents using the embeddings, then adds the vectors
* and documents to the index.
* @param documents Array of documents to be added.
* @param options Optional parameter that can include specific IDs for the documents.
* @returns An array of document IDs.
*/
async addDocuments(documents, options) {
return this.addVectors(await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), documents, options);
}
/**
* Method to delete data from the Weaviate index. It can delete data based
* on specific IDs or a filter.
* @param params Object that includes either an array of IDs or a filter for the data to be deleted.
* @returns Promise that resolves when the deletion is complete.
*/
async delete(params) {
const { ids, filter } = params;
if (ids && ids.length > 0) {
for (const id of ids) {
let deleter = this.client.data
.deleter()
.withClassName(this.indexName)
.withId(id);
if (this.tenant) {
deleter = deleter.withTenant(this.tenant);
}
await deleter.do();
}
}
else if (filter) {
let batchDeleter = this.client.batch
.objectsBatchDeleter()
.withClassName(this.indexName)
.withWhere(filter.where);
if (this.tenant) {
batchDeleter = batchDeleter.withTenant(this.tenant);
}
await batchDeleter.do();
}
else {
throw new Error(`This method requires either "ids" or "filter" to be set in the input object`);
}
}
/**
* Method to perform a similarity search on the stored vectors in the
* Weaviate index. It returns the top k most similar documents and their
* similarity scores.
* @param query The query vector.
* @param k The number of most similar documents to return.
* @param filter Optional filter to apply to the search.
* @returns An array of tuples, where each tuple contains a document and its similarity score.
*/
async similaritySearchVectorWithScore(query, k, filter) {
const resultsWithEmbedding = await this.similaritySearchVectorWithScoreAndEmbedding(query, k, filter);
return resultsWithEmbedding.map(([document, score, _embedding]) => [
document,
score,
]);
}
/**
* Method to perform a similarity search on the stored vectors in the
* Weaviate index. It returns the top k most similar documents, their
* similarity scores and embedding vectors.
* @param query The query vector.
* @param k The number of most similar documents to return.
* @param filter Optional filter to apply to the search.
* @returns An array of tuples, where each tuple contains a document, its similarity score and its embedding vector.
*/
async similaritySearchVectorWithScoreAndEmbedding(query, k, filter) {
try {
let builder = this.client.graphql
.get()
.withClassName(this.indexName)
.withFields(`${this.queryAttrs.join(" ")} _additional { distance vector }`)
.withNearVector({
vector: query,
distance: filter?.distance,
})
.withLimit(k);
if (this.tenant) {
builder = builder.withTenant(this.tenant);
}
if (filter?.where) {
builder = builder.withWhere(filter.where);
}
const result = await builder.do();
const documents = [];
for (const data of result.data.Get[this.indexName]) {
const { [this.textKey]: text, _additional, ...rest } = data;
documents.push([
new Document({
pageContent: text,
metadata: rest,
}),
_additional.distance,
_additional.vector,
]);
}
return documents;
}
catch (e) {
throw Error(`'Error in similaritySearch' ${e}`);
}
}
/**
* Return documents selected using the maximal marginal relevance.
* Maximal marginal relevance optimizes for similarity to the query AND diversity
* among selected documents.
*
* @param {string} query - Text to look up documents similar to.
* @param {number} options.k - Number of documents to return.
* @param {number} options.fetchK - Number of documents to fetch before passing to the MMR algorithm.
* @param {number} options.lambda - Number between 0 and 1 that determines the degree of diversity among the results,
* where 0 corresponds to maximum diversity and 1 to minimum diversity.
* @param {this["FilterType"]} options.filter - Optional filter
* @param _callbacks
*
* @returns {Promise<Document[]>} - List of documents selected by maximal marginal relevance.
*/
async maxMarginalRelevanceSearch(query, options, _callbacks) {
const { k, fetchK = 20, lambda = 0.5, filter } = options;
const queryEmbedding = await this.embeddings.embedQuery(query);
const allResults = await this.similaritySearchVectorWithScoreAndEmbedding(queryEmbedding, fetchK, filter);
const embeddingList = allResults.map(([_doc, _score, embedding]) => embedding);
const mmrIndexes = maximalMarginalRelevance(queryEmbedding, embeddingList, lambda, k);
return mmrIndexes
.filter((idx) => idx !== -1)
.map((idx) => allResults[idx][0]);
}
/**
* Static method to create a new `WeaviateStore` instance from a list of
* texts. It first creates documents from the texts and metadata, then
* adds the documents to the Weaviate index.
* @param texts Array of texts.
* @param metadatas Metadata for the texts. Can be a single object or an array of objects.
* @param embeddings Embeddings to be used for the texts.
* @param args Arguments required to create a new `WeaviateStore` instance.
* @returns A new `WeaviateStore` instance.
*/
static fromTexts(texts, metadatas, embeddings, args) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return WeaviateStore.fromDocuments(docs, embeddings, args);
}
/**
* Static method to create a new `WeaviateStore` instance from a list of
* documents. It adds the documents to the Weaviate index.
* @param docs Array of documents.
* @param embeddings Embeddings to be used for the documents.
* @param args Arguments required to create a new `WeaviateStore` instance.
* @returns A new `WeaviateStore` instance.
*/
static async fromDocuments(docs, embeddings, args) {
const instance = new this(embeddings, args);
await instance.addDocuments(docs);
return instance;
}
/**
* Static method to create a new `WeaviateStore` instance from an existing
* Weaviate index.
* @param embeddings Embeddings to be used for the Weaviate index.
* @param args Arguments required to create a new `WeaviateStore` instance.
* @returns A new `WeaviateStore` instance.
*/
static async fromExistingIndex(embeddings, args) {
return new this(embeddings, args);
}
}