52 lines
1.8 KiB
JavaScript
52 lines
1.8 KiB
JavaScript
import { BaseRetriever } from "@langchain/core/retrievers";
|
|
import { BM25 } from "../utils/@furkantoprak/bm25/BM25.js";
|
|
/**
|
|
* A retriever that uses the BM25 algorithm to rank documents based on their
|
|
* similarity to a query. It uses the "okapibm25" package for BM25 scoring.
|
|
* The k parameter determines the number of documents to return for each query.
|
|
*/
|
|
export class BM25Retriever extends BaseRetriever {
|
|
static lc_name() {
|
|
return "BM25Retriever";
|
|
}
|
|
static fromDocuments(documents, options) {
|
|
return new this({ ...options, docs: documents });
|
|
}
|
|
constructor(options) {
|
|
super(options);
|
|
Object.defineProperty(this, "lc_namespace", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: ["langchain", "retrievers", "bm25_retriever"]
|
|
});
|
|
Object.defineProperty(this, "docs", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "k", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
this.docs = options.docs;
|
|
this.k = options.k;
|
|
}
|
|
preprocessFunc(text) {
|
|
return text.toLowerCase().split(/\s+/);
|
|
}
|
|
async _getRelevantDocuments(query) {
|
|
const processedQuery = this.preprocessFunc(query);
|
|
const documents = this.docs.map((doc) => doc.pageContent);
|
|
const scores = BM25(documents, processedQuery);
|
|
const scoredDocs = this.docs.map((doc, index) => ({
|
|
document: doc,
|
|
score: scores[index],
|
|
}));
|
|
scoredDocs.sort((a, b) => b.score - a.score);
|
|
return scoredDocs.slice(0, this.k).map((item) => item.document);
|
|
}
|
|
}
|