332 lines
13 KiB
JavaScript
332 lines
13 KiB
JavaScript
import { SchemaFieldTypes, VectorAlgorithms } from "redis";
|
|
import { VectorStore } from "@langchain/core/vectorstores";
|
|
import { Document } from "@langchain/core/documents";
|
|
/**
|
|
* @deprecated Install and import from the "@langchain/redis" integration package instead.
|
|
* Class representing a RedisVectorStore. It extends the VectorStore class
|
|
* and includes methods for adding documents and vectors, performing
|
|
* similarity searches, managing the index, and more.
|
|
*/
|
|
export class RedisVectorStore extends VectorStore {
|
|
_vectorstoreType() {
|
|
return "redis";
|
|
}
|
|
constructor(embeddings, _dbConfig) {
|
|
super(embeddings, _dbConfig);
|
|
Object.defineProperty(this, "redisClient", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "indexName", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "indexOptions", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "createIndexOptions", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "keyPrefix", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "contentKey", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "metadataKey", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "vectorKey", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "filter", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
this.redisClient = _dbConfig.redisClient;
|
|
this.indexName = _dbConfig.indexName;
|
|
this.indexOptions = _dbConfig.indexOptions ?? {
|
|
ALGORITHM: VectorAlgorithms.HNSW,
|
|
DISTANCE_METRIC: "COSINE",
|
|
};
|
|
this.keyPrefix = _dbConfig.keyPrefix ?? `doc:${this.indexName}:`;
|
|
this.contentKey = _dbConfig.contentKey ?? "content";
|
|
this.metadataKey = _dbConfig.metadataKey ?? "metadata";
|
|
this.vectorKey = _dbConfig.vectorKey ?? "content_vector";
|
|
this.filter = _dbConfig.filter;
|
|
this.createIndexOptions = {
|
|
ON: "HASH",
|
|
PREFIX: this.keyPrefix,
|
|
..._dbConfig.createIndexOptions,
|
|
};
|
|
}
|
|
/**
|
|
* Method for adding documents to the RedisVectorStore. It first converts
|
|
* the documents to texts and then adds them as vectors.
|
|
* @param documents The documents to add.
|
|
* @param options Optional parameters for adding the documents.
|
|
* @returns A promise that resolves when the documents have been added.
|
|
*/
|
|
async addDocuments(documents, options) {
|
|
const texts = documents.map(({ pageContent }) => pageContent);
|
|
return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
|
|
}
|
|
/**
|
|
* Method for adding vectors to the RedisVectorStore. It checks if the
|
|
* index exists and creates it if it doesn't, then adds the vectors in
|
|
* batches.
|
|
* @param vectors The vectors to add.
|
|
* @param documents The documents associated with the vectors.
|
|
* @param keys Optional keys for the vectors.
|
|
* @param batchSize The size of the batches in which to add the vectors. Defaults to 1000.
|
|
* @returns A promise that resolves when the vectors have been added.
|
|
*/
|
|
async addVectors(vectors, documents, { keys, batchSize = 1000 } = {}) {
|
|
if (!vectors.length || !vectors[0].length) {
|
|
throw new Error("No vectors provided");
|
|
}
|
|
// check if the index exists and create it if it doesn't
|
|
await this.createIndex(vectors[0].length);
|
|
const info = await this.redisClient.ft.info(this.indexName);
|
|
const lastKeyCount = parseInt(info.numDocs, 10) || 0;
|
|
const multi = this.redisClient.multi();
|
|
vectors.map(async (vector, idx) => {
|
|
const key = keys && keys.length
|
|
? keys[idx]
|
|
: `${this.keyPrefix}${idx + lastKeyCount}`;
|
|
const metadata = documents[idx] && documents[idx].metadata
|
|
? documents[idx].metadata
|
|
: {};
|
|
multi.hSet(key, {
|
|
[this.vectorKey]: this.getFloat32Buffer(vector),
|
|
[this.contentKey]: documents[idx].pageContent,
|
|
[this.metadataKey]: this.escapeSpecialChars(JSON.stringify(metadata)),
|
|
});
|
|
// write batch
|
|
if (idx % batchSize === 0) {
|
|
await multi.exec();
|
|
}
|
|
});
|
|
// insert final batch
|
|
await multi.exec();
|
|
}
|
|
/**
|
|
* Method for performing a similarity search in the RedisVectorStore. It
|
|
* returns the documents and their scores.
|
|
* @param query The query vector.
|
|
* @param k The number of nearest neighbors to return.
|
|
* @param filter Optional filter to apply to the search.
|
|
* @returns A promise that resolves to an array of documents and their scores.
|
|
*/
|
|
async similaritySearchVectorWithScore(query, k, filter) {
|
|
if (filter && this.filter) {
|
|
throw new Error("cannot provide both `filter` and `this.filter`");
|
|
}
|
|
const _filter = filter ?? this.filter;
|
|
const results = await this.redisClient.ft.search(this.indexName, ...this.buildQuery(query, k, _filter));
|
|
const result = [];
|
|
if (results.total) {
|
|
for (const res of results.documents) {
|
|
if (res.value) {
|
|
const document = res.value;
|
|
if (document.vector_score) {
|
|
result.push([
|
|
new Document({
|
|
pageContent: (document[this.contentKey] ?? ""),
|
|
metadata: JSON.parse(this.unEscapeSpecialChars((document.metadata ?? "{}"))),
|
|
}),
|
|
Number(document.vector_score),
|
|
]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
/**
|
|
* Static method for creating a new instance of RedisVectorStore from
|
|
* texts. It creates documents from the texts and metadata, then adds them
|
|
* to the RedisVectorStore.
|
|
* @param texts The texts to add.
|
|
* @param metadatas The metadata associated with the texts.
|
|
* @param embeddings The embeddings to use.
|
|
* @param dbConfig The configuration for the RedisVectorStore.
|
|
* @returns A promise that resolves to a new instance of RedisVectorStore.
|
|
*/
|
|
static fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
const docs = [];
|
|
for (let i = 0; i < texts.length; i += 1) {
|
|
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
const newDoc = new Document({
|
|
pageContent: texts[i],
|
|
metadata,
|
|
});
|
|
docs.push(newDoc);
|
|
}
|
|
return RedisVectorStore.fromDocuments(docs, embeddings, dbConfig);
|
|
}
|
|
/**
|
|
* Static method for creating a new instance of RedisVectorStore from
|
|
* documents. It adds the documents to the RedisVectorStore.
|
|
* @param docs The documents to add.
|
|
* @param embeddings The embeddings to use.
|
|
* @param dbConfig The configuration for the RedisVectorStore.
|
|
* @returns A promise that resolves to a new instance of RedisVectorStore.
|
|
*/
|
|
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
const instance = new this(embeddings, dbConfig);
|
|
await instance.addDocuments(docs);
|
|
return instance;
|
|
}
|
|
/**
|
|
* Method for checking if an index exists in the RedisVectorStore.
|
|
* @returns A promise that resolves to a boolean indicating whether the index exists.
|
|
*/
|
|
async checkIndexExists() {
|
|
try {
|
|
await this.redisClient.ft.info(this.indexName);
|
|
}
|
|
catch (err) {
|
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
if (err?.message.includes("unknown command")) {
|
|
throw new Error("Failed to run FT.INFO command. Please ensure that you are running a RediSearch-capable Redis instance: https://js.langchain.com/docs/modules/data_connection/vectorstores/integrations/redis#setup");
|
|
}
|
|
// index doesn't exist
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
/**
|
|
* Method for creating an index in the RedisVectorStore. If the index
|
|
* already exists, it does nothing.
|
|
* @param dimensions The dimensions of the index
|
|
* @returns A promise that resolves when the index has been created.
|
|
*/
|
|
async createIndex(dimensions = 1536) {
|
|
if (await this.checkIndexExists()) {
|
|
return;
|
|
}
|
|
const schema = {
|
|
[this.vectorKey]: {
|
|
type: SchemaFieldTypes.VECTOR,
|
|
TYPE: "FLOAT32",
|
|
DIM: dimensions,
|
|
...this.indexOptions,
|
|
},
|
|
[this.contentKey]: SchemaFieldTypes.TEXT,
|
|
[this.metadataKey]: SchemaFieldTypes.TEXT,
|
|
};
|
|
await this.redisClient.ft.create(this.indexName, schema, this.createIndexOptions);
|
|
}
|
|
/**
|
|
* Method for dropping an index from the RedisVectorStore.
|
|
* @param deleteDocuments Optional boolean indicating whether to drop the associated documents.
|
|
* @returns A promise that resolves to a boolean indicating whether the index was dropped.
|
|
*/
|
|
async dropIndex(deleteDocuments) {
|
|
try {
|
|
const options = deleteDocuments ? { DD: deleteDocuments } : undefined;
|
|
await this.redisClient.ft.dropIndex(this.indexName, options);
|
|
return true;
|
|
}
|
|
catch (err) {
|
|
return false;
|
|
}
|
|
}
|
|
/**
|
|
* Deletes vectors from the vector store.
|
|
* @param params The parameters for deleting vectors.
|
|
* @returns A promise that resolves when the vectors have been deleted.
|
|
*/
|
|
async delete(params) {
|
|
if (params.deleteAll) {
|
|
await this.dropIndex(true);
|
|
}
|
|
else {
|
|
throw new Error(`Invalid parameters passed to "delete".`);
|
|
}
|
|
}
|
|
buildQuery(query, k, filter) {
|
|
const vectorScoreField = "vector_score";
|
|
let hybridFields = "*";
|
|
// if a filter is set, modify the hybrid query
|
|
if (filter && filter.length) {
|
|
// `filter` is a list of strings, then it's applied using the OR operator in the metadata key
|
|
// for example: filter = ['foo', 'bar'] => this will filter all metadata containing either 'foo' OR 'bar'
|
|
hybridFields = `@${this.metadataKey}:(${this.prepareFilter(filter)})`;
|
|
}
|
|
const baseQuery = `${hybridFields} => [KNN ${k} @${this.vectorKey} $vector AS ${vectorScoreField}]`;
|
|
const returnFields = [this.metadataKey, this.contentKey, vectorScoreField];
|
|
const options = {
|
|
PARAMS: {
|
|
vector: this.getFloat32Buffer(query),
|
|
},
|
|
RETURN: returnFields,
|
|
SORTBY: vectorScoreField,
|
|
DIALECT: 2,
|
|
LIMIT: {
|
|
from: 0,
|
|
size: k,
|
|
},
|
|
};
|
|
return [baseQuery, options];
|
|
}
|
|
prepareFilter(filter) {
|
|
return filter.map(this.escapeSpecialChars).join("|");
|
|
}
|
|
/**
|
|
* Escapes all '-' characters.
|
|
* RediSearch considers '-' as a negative operator, hence we need
|
|
* to escape it
|
|
* @see https://redis.io/docs/stack/search/reference/query_syntax
|
|
*
|
|
* @param str
|
|
* @returns
|
|
*/
|
|
escapeSpecialChars(str) {
|
|
return str.replaceAll("-", "\\-");
|
|
}
|
|
/**
|
|
* Unescapes all '-' characters, returning the original string
|
|
*
|
|
* @param str
|
|
* @returns
|
|
*/
|
|
unEscapeSpecialChars(str) {
|
|
return str.replaceAll("\\-", "-");
|
|
}
|
|
/**
|
|
* Converts the vector to the buffer Redis needs to
|
|
* correctly store an embedding
|
|
*
|
|
* @param vector
|
|
* @returns Buffer
|
|
*/
|
|
getFloat32Buffer(vector) {
|
|
return Buffer.from(new Float32Array(vector).buffer);
|
|
}
|
|
}
|