agsamantha/node_modules/@langchain/community/dist/vectorstores/redis.js
2024-10-02 15:15:21 -05:00

332 lines
13 KiB
JavaScript

import { SchemaFieldTypes, VectorAlgorithms } from "redis";
import { VectorStore } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
/**
* @deprecated Install and import from the "@langchain/redis" integration package instead.
* Class representing a RedisVectorStore. It extends the VectorStore class
* and includes methods for adding documents and vectors, performing
* similarity searches, managing the index, and more.
*/
export class RedisVectorStore extends VectorStore {
_vectorstoreType() {
return "redis";
}
constructor(embeddings, _dbConfig) {
super(embeddings, _dbConfig);
Object.defineProperty(this, "redisClient", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexOptions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "createIndexOptions", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "keyPrefix", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "contentKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "metadataKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "vectorKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "filter", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.redisClient = _dbConfig.redisClient;
this.indexName = _dbConfig.indexName;
this.indexOptions = _dbConfig.indexOptions ?? {
ALGORITHM: VectorAlgorithms.HNSW,
DISTANCE_METRIC: "COSINE",
};
this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
this.contentKey = _dbConfig.contentKey ?? "content";
this.metadataKey = _dbConfig.metadataKey ?? "metadata";
this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
this.filter = _dbConfig.filter;
this.createIndexOptions = {
ON: "HASH",
PREFIX: this.keyPrefix,
..._dbConfig.createIndexOptions,
};
}
/**
* Method for adding documents to the RedisVectorStore. It first converts
* the documents to texts and then adds them as vectors.
* @param documents The documents to add.
* @param options Optional parameters for adding the documents.
* @returns A promise that resolves when the documents have been added.
*/
async addDocuments(documents, options) {
const texts = documents.map(({ pageContent }) => pageContent);
return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
}
/**
* Method for adding vectors to the RedisVectorStore. It checks if the
* index exists and creates it if it doesn't, then adds the vectors in
* batches.
* @param vectors The vectors to add.
* @param documents The documents associated with the vectors.
* @param keys Optional keys for the vectors.
* @param batchSize The size of the batches in which to add the vectors. Defaults to 1000.
* @returns A promise that resolves when the vectors have been added.
*/
async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
if (!vectors.length || !vectors[0].length) {
throw new Error("No vectors provided");
}
// check if the index exists and create it if it doesn't
await this.createIndex(vectors[0].length);
const info = await this.redisClient.ft.info(this.indexName);
const lastKeyCount = parseInt(info.numDocs, 10) || 0;
const multi = this.redisClient.multi();
vectors.map(async (vector, idx) => {
const key = keys && keys.length
? keys[idx]
: `${this.keyPrefix}${idx + lastKeyCount}`;
const metadata = documents[idx] && documents[idx].metadata
? documents[idx].metadata
: {};
multi.hSet(key, {
[this.vectorKey]: this.getFloat32Buffer(vector),
[this.contentKey]: documents[idx].pageContent,
[this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
});
// write batch
if (idx % batchSize === 0) {
await multi.exec();
}
});
// insert final batch
await multi.exec();
}
/**
* Method for performing a similarity search in the RedisVectorStore. It
* returns the documents and their scores.
* @param query The query vector.
* @param k The number of nearest neighbors to return.
* @param filter Optional filter to apply to the search.
* @returns A promise that resolves to an array of documents and their scores.
*/
async similaritySearchVectorWithScore(query, k, filter) {
if (filter && this.filter) {
throw new Error("cannot provide both `filter` and `this.filter`");
}
const _filter = filter ?? this.filter;
const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
const result = [];
if (results.total) {
for (const res of results.documents) {
if (res.value) {
const document = res.value;
if (document.vector_score) {
result.push([
new Document({
pageContent: (document[this.contentKey] ?? ""),
metadata: JSON.parse(this.unEscapeSpecialChars((document.metadata ?? "{}"))),
}),
Number(document.vector_score),
]);
}
}
}
}
return result;
}
/**
* Static method for creating a new instance of RedisVectorStore from
* texts. It creates documents from the texts and metadata, then adds them
* to the RedisVectorStore.
* @param texts The texts to add.
* @param metadatas The metadata associated with the texts.
* @param embeddings The embeddings to use.
* @param dbConfig The configuration for the RedisVectorStore.
* @returns A promise that resolves to a new instance of RedisVectorStore.
*/
static fromTexts(texts, metadatas, embeddings, dbConfig) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
}
/**
* Static method for creating a new instance of RedisVectorStore from
* documents. It adds the documents to the RedisVectorStore.
* @param docs The documents to add.
* @param embeddings The embeddings to use.
* @param dbConfig The configuration for the RedisVectorStore.
* @returns A promise that resolves to a new instance of RedisVectorStore.
*/
static async fromDocuments(docs, embeddings, dbConfig) {
const instance = new this(embeddings, dbConfig);
await instance.addDocuments(docs);
return instance;
}
/**
* Method for checking if an index exists in the RedisVectorStore.
* @returns A promise that resolves to a boolean indicating whether the index exists.
*/
async checkIndexExists() {
try {
await this.redisClient.ft.info(this.indexName);
}
catch (err) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
if (err?.message.includes("unknown command")) {
throw new Error("Failed to run FT.INFO command. Please ensure that you are running a RediSearch-capable Redis instance: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/redis#setup");
}
// index doesn't exist
return false;
}
return true;
}
/**
* Method for creating an index in the RedisVectorStore. If the index
* already exists, it does nothing.
* @param dimensions The dimensions of the index
* @returns A promise that resolves when the index has been created.
*/
async createIndex(dimensions = 1536) {
if (await this.checkIndexExists()) {
return;
}
const schema = {
[this.vectorKey]: {
type: SchemaFieldTypes.VECTOR,
TYPE: "FLOAT32",
DIM: dimensions,
...this.indexOptions,
},
[this.contentKey]: SchemaFieldTypes.TEXT,
[this.metadataKey]: SchemaFieldTypes.TEXT,
};
await this.redisClient.ft.create(this.indexName, schema, this.createIndexOptions);
}
/**
* Method for dropping an index from the RedisVectorStore.
* @param deleteDocuments Optional boolean indicating whether to drop the associated documents.
* @returns A promise that resolves to a boolean indicating whether the index was dropped.
*/
async dropIndex(deleteDocuments) {
try {
const options = deleteDocuments ? { DD: deleteDocuments } : undefined;
await this.redisClient.ft.dropIndex(this.indexName, options);
return true;
}
catch (err) {
return false;
}
}
/**
* Deletes vectors from the vector store.
* @param params The parameters for deleting vectors.
* @returns A promise that resolves when the vectors have been deleted.
*/
async delete(params) {
if (params.deleteAll) {
await this.dropIndex(true);
}
else {
throw new Error(`Invalid parameters passed to "delete".`);
}
}
buildQuery(query, k, filter) {
const vectorScoreField = "vector_score";
let hybridFields = "*";
// if a filter is set, modify the hybrid query
if (filter && filter.length) {
// `filter` is a list of strings, then it's applied using the OR operator in the metadata key
// for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
}
const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
const options = {
PARAMS: {
vector: this.getFloat32Buffer(query),
},
RETURN: returnFields,
SORTBY: vectorScoreField,
DIALECT: 2,
LIMIT: {
from: 0,
size: k,
},
};
return [baseQuery, options];
}
prepareFilter(filter) {
return filter.map(this.escapeSpecialChars).join("|");
}
/**
* Escapes all '-' characters.
* RediSearch considers '-' as a negative operator, hence we need
* to escape it
* @see https://redis.io/docs/stack/search/reference/query_syntax
*
* @param str
* @returns
*/
escapeSpecialChars(str) {
return str.replaceAll("-", "\\-");
}
/**
* Unescapes all '-' characters, returning the original string
*
* @param str
* @returns
*/
unEscapeSpecialChars(str) {
return str.replaceAll("\\-", "-");
}
/**
* Converts the vector to the buffer Redis needs to
* correctly store an embedding
*
* @param vector
* @returns Buffer
*/
getFloat32Buffer(vector) {
return Buffer.from(new Float32Array(vector).buffer);
}
}