agsamantha/node_modules/@langchain/community/dist/utils/@furkantoprak/bm25/BM25.d.ts
2024-10-02 15:15:21 -05:00

37 lines
1.8 KiB
TypeScript

/**
* Adapted from
* https://github.com/FurkanToprak/OkapiBM25
*
* Inlined due to CJS import issues.
*/
/** Gets word count. */
export declare const getWordCount: (corpus: string) => number;
/** Number of occurences of a word in a string. */
export declare const getTermFrequency: (term: string, corpus: string) => number;
/** Inverse document frequency. */
export declare const getIDF: (term: string, documents: string[]) => number;
/** Represents a document; useful when sorting results.
*/
export interface BMDocument {
/** The document is originally scoreed. */
document: string;
/** The score that the document recieves. */
score: number;
}
/** Constants that are free parameters used in BM25, specifically when generating inverse document frequency. */
export interface BMConstants {
/** Free parameter. Is 0.75 by default. */
b?: number;
/** Free parameter. Is 1.2 by default. Generally in range [1.2, 2.0] */
k1?: number;
}
/** If returns positive, the sorting results in secondEl coming before firstEl, else, firstEl comes before secondEL */
export type BMSorter = (firstEl: BMDocument, secondEl: BMDocument) => number;
/** Implementation of Okapi BM25 algorithm.
* @param documents: Collection of documents.
* @param keywords: query terms.
* @param constants: Contains free parameters k1 and b. b=0.75 and k1=1.2 by default.
* @param sort: A function that allows you to sort queries by a given rule. If not provided, returns results corresponding to the original order.
* If this option is provided, the return type will not be an array of scores but an array of documents with their scores.
*/
export declare function BM25(documents: string[], keywords: string[], constants?: BMConstants, sorter?: BMSorter): number[] | BMDocument[];