agsamantha/node_modules/@langchain/textsplitters/dist/text_splitter.cjs

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.LatexTextSplitter = exports.MarkdownTextSplitter = exports.TokenTextSplitter = exports.RecursiveCharacterTextSplitter = exports.SupportedTextSplitterLanguages = exports.CharacterTextSplitter = exports.TextSplitter = void 0;
const documents_1 = require("@langchain/core/documents");
const tiktoken_1 = require("@langchain/core/utils/tiktoken");
class TextSplitter extends documents_1.BaseDocumentTransformer {
    constructor(fields) {
        super(fields);
        Object.defineProperty(this, "lc_namespace", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: ["langchain", "document_transformers", "text_splitters"]
        });
        Object.defineProperty(this, "chunkSize", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: 1000
        });
        Object.defineProperty(this, "chunkOverlap", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: 200
        });
        Object.defineProperty(this, "keepSeparator", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: false
        });
        Object.defineProperty(this, "lengthFunction", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        this.chunkSize = fields?.chunkSize ?? this.chunkSize;
        this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;
        this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;
        this.lengthFunction =
            fields?.lengthFunction ?? ((text) => text.length);
        if (this.chunkOverlap >= this.chunkSize) {
            throw new Error("Cannot have chunkOverlap >= chunkSize");
        }
    }
    async transformDocuments(documents, chunkHeaderOptions = {}) {
        return this.splitDocuments(documents, chunkHeaderOptions);
    }
    splitOnSeparator(text, separator) {
        let splits;
        if (separator) {
            if (this.keepSeparator) {
                const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()|[\]{}]/g, "\\$&");
                splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
            }
            else {
                splits = text.split(separator);
            }
        }
        else {
            splits = text.split("");
        }
        return splits.filter((s) => s !== "");
    }
    async createDocuments(texts, 
    // eslint-disable-next-line @typescript-eslint/no-explicit-any
    metadatas = [], chunkHeaderOptions = {}) {
        // if no metadata is provided, we create an empty one for each text
        // eslint-disable-next-line @typescript-eslint/no-explicit-any
        const _metadatas = metadatas.length > 0
            ? metadatas
            : [...Array(texts.length)].map(() => ({}));
        const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
        const documents = new Array();
        for (let i = 0; i < texts.length; i += 1) {
            const text = texts[i];
            let lineCounterIndex = 1;
            let prevChunk = null;
            let indexPrevChunk = -1;
            for (const chunk of await this.splitText(text)) {
                let pageContent = chunkHeader;
                // we need to count the \n that are in the text before getting removed by the splitting
                const indexChunk = text.indexOf(chunk, indexPrevChunk + 1);
                if (prevChunk === null) {
                    const newLinesBeforeFirstChunk = this.numberOfNewLines(text, 0, indexChunk);
                    lineCounterIndex += newLinesBeforeFirstChunk;
                }
                else {
                    const indexEndPrevChunk = indexPrevChunk + (await this.lengthFunction(prevChunk));
                    if (indexEndPrevChunk < indexChunk) {
                        const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexEndPrevChunk, indexChunk);
                        lineCounterIndex += numberOfIntermediateNewLines;
                    }
                    else if (indexEndPrevChunk > indexChunk) {
                        const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexChunk, indexEndPrevChunk);
                        lineCounterIndex -= numberOfIntermediateNewLines;
                    }
                    if (appendChunkOverlapHeader) {
                        pageContent += chunkOverlapHeader;
                    }
                }
                const newLinesCount = this.numberOfNewLines(chunk);
                const loc = _metadatas[i].loc && typeof _metadatas[i].loc === "object"
                    ? { ..._metadatas[i].loc }
                    : {};
                loc.lines = {
                    from: lineCounterIndex,
                    to: lineCounterIndex + newLinesCount,
                };
                const metadataWithLinesNumber = {
                    ..._metadatas[i],
                    loc,
                };
                pageContent += chunk;
                documents.push(new documents_1.Document({
                    pageContent,
                    metadata: metadataWithLinesNumber,
                }));
                lineCounterIndex += newLinesCount;
                prevChunk = chunk;
                indexPrevChunk = indexChunk;
            }
        }
        return documents;
    }
    numberOfNewLines(text, start, end) {
        const textSection = text.slice(start, end);
        return (textSection.match(/\n/g) || []).length;
    }
    async splitDocuments(documents, chunkHeaderOptions = {}) {
        const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
        const texts = selectedDocuments.map((doc) => doc.pageContent);
        const metadatas = selectedDocuments.map((doc) => doc.metadata);
        return this.createDocuments(texts, metadatas, chunkHeaderOptions);
    }
    joinDocs(docs, separator) {
        const text = docs.join(separator).trim();
        return text === "" ? null : text;
    }
    async mergeSplits(splits, separator) {
        const docs = [];
        const currentDoc = [];
        let total = 0;
        for (const d of splits) {
            const _len = await this.lengthFunction(d);
            if (total + _len + currentDoc.length * separator.length >
                this.chunkSize) {
                if (total > this.chunkSize) {
                    console.warn(`Created a chunk of size ${total}, +
which is longer than the specified ${this.chunkSize}`);
                }
                if (currentDoc.length > 0) {
                    const doc = this.joinDocs(currentDoc, separator);
                    if (doc !== null) {
                        docs.push(doc);
                    }
                    // Keep on popping if:
                    // - we have a larger chunk than in the chunk overlap
                    // - or if we still have any chunks and the length is long
                    while (total > this.chunkOverlap ||
                        (total + _len + currentDoc.length * separator.length >
                            this.chunkSize &&
                            total > 0)) {
                        total -= await this.lengthFunction(currentDoc[0]);
                        currentDoc.shift();
                    }
                }
            }
            currentDoc.push(d);
            total += _len;
        }
        const doc = this.joinDocs(currentDoc, separator);
        if (doc !== null) {
            docs.push(doc);
        }
        return docs;
    }
}
exports.TextSplitter = TextSplitter;
class CharacterTextSplitter extends TextSplitter {
    static lc_name() {
        return "CharacterTextSplitter";
    }
    constructor(fields) {
        super(fields);
        Object.defineProperty(this, "separator", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "\n\n"
        });
        this.separator = fields?.separator ?? this.separator;
    }
    async splitText(text) {
        // First we naively split the large input into a bunch of smaller ones.
        const splits = this.splitOnSeparator(text, this.separator);
        return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);
    }
}
exports.CharacterTextSplitter = CharacterTextSplitter;
exports.SupportedTextSplitterLanguages = [
    "cpp",
    "go",
    "java",
    "js",
    "php",
    "proto",
    "python",
    "rst",
    "ruby",
    "rust",
    "scala",
    "swift",
    "markdown",
    "latex",
    "html",
    "sol",
];
class RecursiveCharacterTextSplitter extends TextSplitter {
    static lc_name() {
        return "RecursiveCharacterTextSplitter";
    }
    constructor(fields) {
        super(fields);
        Object.defineProperty(this, "separators", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: ["\n\n", "\n", " ", ""]
        });
        this.separators = fields?.separators ?? this.separators;
        this.keepSeparator = fields?.keepSeparator ?? true;
    }
    async _splitText(text, separators) {
        const finalChunks = [];
        // Get appropriate separator to use
        let separator = separators[separators.length - 1];
        let newSeparators;
        for (let i = 0; i < separators.length; i += 1) {
            const s = separators[i];
            if (s === "") {
                separator = s;
                break;
            }
            if (text.includes(s)) {
                separator = s;
                newSeparators = separators.slice(i + 1);
                break;
            }
        }
        // Now that we have the separator, split the text
        const splits = this.splitOnSeparator(text, separator);
        // Now go merging things, recursively splitting longer texts.
        let goodSplits = [];
        const _separator = this.keepSeparator ? "" : separator;
        for (const s of splits) {
            if ((await this.lengthFunction(s)) < this.chunkSize) {
                goodSplits.push(s);
            }
            else {
                if (goodSplits.length) {
                    const mergedText = await this.mergeSplits(goodSplits, _separator);
                    finalChunks.push(...mergedText);
                    goodSplits = [];
                }
                if (!newSeparators) {
                    finalChunks.push(s);
                }
                else {
                    const otherInfo = await this._splitText(s, newSeparators);
                    finalChunks.push(...otherInfo);
                }
            }
        }
        if (goodSplits.length) {
            const mergedText = await this.mergeSplits(goodSplits, _separator);
            finalChunks.push(...mergedText);
        }
        return finalChunks;
    }
    async splitText(text) {
        return this._splitText(text, this.separators);
    }
    static fromLanguage(language, options) {
        return new RecursiveCharacterTextSplitter({
            ...options,
            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),
        });
    }
    static getSeparatorsForLanguage(language) {
        if (language === "cpp") {
            return [
                // Split along class definitions
                "\nclass ",
                // Split along function definitions
                "\nvoid ",
                "\nint ",
                "\nfloat ",
                "\ndouble ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\nswitch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "go") {
            return [
                // Split along function definitions
                "\nfunc ",
                "\nvar ",
                "\nconst ",
                "\ntype ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nswitch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "java") {
            return [
                // Split along class definitions
                "\nclass ",
                // Split along method definitions
                "\npublic ",
                "\nprotected ",
                "\nprivate ",
                "\nstatic ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\nswitch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "js") {
            return [
                // Split along function definitions
                "\nfunction ",
                "\nconst ",
                "\nlet ",
                "\nvar ",
                "\nclass ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\nswitch ",
                "\ncase ",
                "\ndefault ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "php") {
            return [
                // Split along function definitions
                "\nfunction ",
                // Split along class definitions
                "\nclass ",
                // Split along control flow statements
                "\nif ",
                "\nforeach ",
                "\nwhile ",
                "\ndo ",
                "\nswitch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "proto") {
            return [
                // Split along message definitions
                "\nmessage ",
                // Split along service definitions
                "\nservice ",
                // Split along enum definitions
                "\nenum ",
                // Split along option definitions
                "\noption ",
                // Split along import statements
                "\nimport ",
                // Split along syntax declarations
                "\nsyntax ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "python") {
            return [
                // First, try to split along class definitions
                "\nclass ",
                "\ndef ",
                "\n\tdef ",
                // Now split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "rst") {
            return [
                // Split along section titles
                "\n===\n",
                "\n---\n",
                "\n***\n",
                // Split along directive markers
                "\n.. ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "ruby") {
            return [
                // Split along method definitions
                "\ndef ",
                "\nclass ",
                // Split along control flow statements
                "\nif ",
                "\nunless ",
                "\nwhile ",
                "\nfor ",
                "\ndo ",
                "\nbegin ",
                "\nrescue ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "rust") {
            return [
                // Split along function definitions
                "\nfn ",
                "\nconst ",
                "\nlet ",
                // Split along control flow statements
                "\nif ",
                "\nwhile ",
                "\nfor ",
                "\nloop ",
                "\nmatch ",
                "\nconst ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "scala") {
            return [
                // Split along class definitions
                "\nclass ",
                "\nobject ",
                // Split along method definitions
                "\ndef ",
                "\nval ",
                "\nvar ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\nmatch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "swift") {
            return [
                // Split along function definitions
                "\nfunc ",
                // Split along class definitions
                "\nclass ",
                "\nstruct ",
                "\nenum ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\ndo ",
                "\nswitch ",
                "\ncase ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "markdown") {
            return [
                // First, try to split along Markdown headings (starting with level 2)
                "\n## ",
                "\n### ",
                "\n#### ",
                "\n##### ",
                "\n###### ",
                // Note the alternative syntax for headings (below) is not handled here
                // Heading level 2
                // ---------------
                // End of code block
                "```\n\n",
                // Horizontal lines
                "\n\n***\n\n",
                "\n\n---\n\n",
                "\n\n___\n\n",
                // Note that this splitter doesn't handle horizontal lines defined
                // by *three or more* of ***, ---, or ___, but this is not handled
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "latex") {
            return [
                // First, try to split along Latex sections
                "\n\\chapter{",
                "\n\\section{",
                "\n\\subsection{",
                "\n\\subsubsection{",
                // Now split by environments
                "\n\\begin{enumerate}",
                "\n\\begin{itemize}",
                "\n\\begin{description}",
                "\n\\begin{list}",
                "\n\\begin{quote}",
                "\n\\begin{quotation}",
                "\n\\begin{verse}",
                "\n\\begin{verbatim}",
                // Now split by math environments
                "\n\\begin{align}",
                "$$",
                "$",
                // Now split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else if (language === "html") {
            return [
                // First, try to split along HTML tags
                "<body>",
                "<div>",
                "<p>",
                "<br>",
                "<li>",
                "<h1>",
                "<h2>",
                "<h3>",
                "<h4>",
                "<h5>",
                "<h6>",
                "<span>",
                "<table>",
                "<tr>",
                "<td>",
                "<th>",
                "<ul>",
                "<ol>",
                "<header>",
                "<footer>",
                "<nav>",
                // Head
                "<head>",
                "<style>",
                "<script>",
                "<meta>",
                "<title>",
                // Normal type of lines
                " ",
                "",
            ];
        }
        else if (language === "sol") {
            return [
                // Split along compiler informations definitions
                "\npragma ",
                "\nusing ",
                // Split along contract definitions
                "\ncontract ",
                "\ninterface ",
                "\nlibrary ",
                // Split along method definitions
                "\nconstructor ",
                "\ntype ",
                "\nfunction ",
                "\nevent ",
                "\nmodifier ",
                "\nerror ",
                "\nstruct ",
                "\nenum ",
                // Split along control flow statements
                "\nif ",
                "\nfor ",
                "\nwhile ",
                "\ndo while ",
                "\nassembly ",
                // Split by the normal type of lines
                "\n\n",
                "\n",
                " ",
                "",
            ];
        }
        else {
            throw new Error(`Language ${language} is not supported.`);
        }
    }
}
exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;
/**
 * Implementation of splitter which looks at tokens.
 */
class TokenTextSplitter extends TextSplitter {
    static lc_name() {
        return "TokenTextSplitter";
    }
    constructor(fields) {
        super(fields);
        Object.defineProperty(this, "encodingName", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "allowedSpecial", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "disallowedSpecial", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "tokenizer", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        this.encodingName = fields?.encodingName ?? "gpt2";
        this.allowedSpecial = fields?.allowedSpecial ?? [];
        this.disallowedSpecial = fields?.disallowedSpecial ?? "all";
    }
    async splitText(text) {
        if (!this.tokenizer) {
            this.tokenizer = await (0, tiktoken_1.getEncoding)(this.encodingName);
        }
        const splits = [];
        const input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);
        let start_idx = 0;
        while (start_idx < input_ids.length) {
            if (start_idx > 0) {
                start_idx -= this.chunkOverlap;
            }
            const end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
            const chunk_ids = input_ids.slice(start_idx, end_idx);
            splits.push(this.tokenizer.decode(chunk_ids));
            start_idx = end_idx;
        }
        return splits;
    }
}
exports.TokenTextSplitter = TokenTextSplitter;
class MarkdownTextSplitter extends RecursiveCharacterTextSplitter {
    constructor(fields) {
        super({
            ...fields,
            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),
        });
    }
}
exports.MarkdownTextSplitter = MarkdownTextSplitter;
class LatexTextSplitter extends RecursiveCharacterTextSplitter {
    constructor(fields) {
        super({
            ...fields,
            separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),
        });
    }
}
exports.LatexTextSplitter = LatexTextSplitter;
update readme+reqs 2024-10-02 15:15:21 -05:00			`"use strict";`
			`Object.defineProperty(exports, "__esModule", { value: true });`
			`exports.LatexTextSplitter = exports.MarkdownTextSplitter = exports.TokenTextSplitter = exports.RecursiveCharacterTextSplitter = exports.SupportedTextSplitterLanguages = exports.CharacterTextSplitter = exports.TextSplitter = void 0;`
			`const documents_1 = require("@langchain/core/documents");`
			`const tiktoken_1 = require("@langchain/core/utils/tiktoken");`
			`class TextSplitter extends documents_1.BaseDocumentTransformer {`
			`constructor(fields) {`
			`super(fields);`
			`Object.defineProperty(this, "lc_namespace", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: ["langchain", "document_transformers", "text_splitters"]`
			`});`
			`Object.defineProperty(this, "chunkSize", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: 1000`
			`});`
			`Object.defineProperty(this, "chunkOverlap", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: 200`
			`});`
			`Object.defineProperty(this, "keepSeparator", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: false`
			`});`
			`Object.defineProperty(this, "lengthFunction", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: void 0`
			`});`
			`this.chunkSize = fields?.chunkSize ?? this.chunkSize;`
			`this.chunkOverlap = fields?.chunkOverlap ?? this.chunkOverlap;`
			`this.keepSeparator = fields?.keepSeparator ?? this.keepSeparator;`
			`this.lengthFunction =`
			`fields?.lengthFunction ?? ((text) => text.length);`
			`if (this.chunkOverlap >= this.chunkSize) {`
			`throw new Error("Cannot have chunkOverlap >= chunkSize");`
			`}`
			`}`
			`async transformDocuments(documents, chunkHeaderOptions = {}) {`
			`return this.splitDocuments(documents, chunkHeaderOptions);`
			`}`
			`splitOnSeparator(text, separator) {`
			`let splits;`
			`if (separator) {`
			`if (this.keepSeparator) {`
			`const regexEscapedSeparator = separator.replace(/[/\-\\^$*+?.()\|[\]{}]/g, "\\$&");`
			splits = text.split(new RegExp(`(?=${regexEscapedSeparator})`));
			`}`
			`else {`
			`splits = text.split(separator);`
			`}`
			`}`
			`else {`
			`splits = text.split("");`
			`}`
			`return splits.filter((s) => s !== "");`
			`}`
			`async createDocuments(texts,`
			`// eslint-disable-next-line @typescript-eslint/no-explicit-any`
			`metadatas = [], chunkHeaderOptions = {}) {`
			`// if no metadata is provided, we create an empty one for each text`
			`// eslint-disable-next-line @typescript-eslint/no-explicit-any`
			`const _metadatas = metadatas.length > 0`
			`? metadatas`
			`: [...Array(texts.length)].map(() => ({}));`
			`const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;`
			`const documents = new Array();`
			`for (let i = 0; i < texts.length; i += 1) {`
			`const text = texts[i];`
			`let lineCounterIndex = 1;`
			`let prevChunk = null;`
			`let indexPrevChunk = -1;`
			`for (const chunk of await this.splitText(text)) {`
			`let pageContent = chunkHeader;`
			`// we need to count the \n that are in the text before getting removed by the splitting`
			`const indexChunk = text.indexOf(chunk, indexPrevChunk + 1);`
			`if (prevChunk === null) {`
			`const newLinesBeforeFirstChunk = this.numberOfNewLines(text, 0, indexChunk);`
			`lineCounterIndex += newLinesBeforeFirstChunk;`
			`}`
			`else {`
			`const indexEndPrevChunk = indexPrevChunk + (await this.lengthFunction(prevChunk));`
			`if (indexEndPrevChunk < indexChunk) {`
			`const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexEndPrevChunk, indexChunk);`
			`lineCounterIndex += numberOfIntermediateNewLines;`
			`}`
			`else if (indexEndPrevChunk > indexChunk) {`
			`const numberOfIntermediateNewLines = this.numberOfNewLines(text, indexChunk, indexEndPrevChunk);`
			`lineCounterIndex -= numberOfIntermediateNewLines;`
			`}`
			`if (appendChunkOverlapHeader) {`
			`pageContent += chunkOverlapHeader;`
			`}`
			`}`
			`const newLinesCount = this.numberOfNewLines(chunk);`
			`const loc = _metadatas[i].loc && typeof _metadatas[i].loc === "object"`
			`? { ..._metadatas[i].loc }`
			`: {};`
			`loc.lines = {`
			`from: lineCounterIndex,`
			`to: lineCounterIndex + newLinesCount,`
			`};`
			`const metadataWithLinesNumber = {`
			`..._metadatas[i],`
			`loc,`
			`};`
			`pageContent += chunk;`
			`documents.push(new documents_1.Document({`
			`pageContent,`
			`metadata: metadataWithLinesNumber,`
			`}));`
			`lineCounterIndex += newLinesCount;`
			`prevChunk = chunk;`
			`indexPrevChunk = indexChunk;`
			`}`
			`}`
			`return documents;`
			`}`
			`numberOfNewLines(text, start, end) {`
			`const textSection = text.slice(start, end);`
			`return (textSection.match(/\n/g) \|\| []).length;`
			`}`
			`async splitDocuments(documents, chunkHeaderOptions = {}) {`
			`const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);`
			`const texts = selectedDocuments.map((doc) => doc.pageContent);`
			`const metadatas = selectedDocuments.map((doc) => doc.metadata);`
			`return this.createDocuments(texts, metadatas, chunkHeaderOptions);`
			`}`
			`joinDocs(docs, separator) {`
			`const text = docs.join(separator).trim();`
			`return text === "" ? null : text;`
			`}`
			`async mergeSplits(splits, separator) {`
			`const docs = [];`
			`const currentDoc = [];`
			`let total = 0;`
			`for (const d of splits) {`
			`const _len = await this.lengthFunction(d);`
			`if (total + _len + currentDoc.length * separator.length >`
			`this.chunkSize) {`
			`if (total > this.chunkSize) {`
			console.warn(`Created a chunk of size ${total}, +
			which is longer than the specified ${this.chunkSize}`);
			`}`
			`if (currentDoc.length > 0) {`
			`const doc = this.joinDocs(currentDoc, separator);`
			`if (doc !== null) {`
			`docs.push(doc);`
			`}`
			`// Keep on popping if:`
			`// - we have a larger chunk than in the chunk overlap`
			`// - or if we still have any chunks and the length is long`
			`while (total > this.chunkOverlap \|\|`
			`(total + _len + currentDoc.length * separator.length >`
			`this.chunkSize &&`
			`total > 0)) {`
			`total -= await this.lengthFunction(currentDoc[0]);`
			`currentDoc.shift();`
			`}`
			`}`
			`}`
			`currentDoc.push(d);`
			`total += _len;`
			`}`
			`const doc = this.joinDocs(currentDoc, separator);`
			`if (doc !== null) {`
			`docs.push(doc);`
			`}`
			`return docs;`
			`}`
			`}`
			`exports.TextSplitter = TextSplitter;`
			`class CharacterTextSplitter extends TextSplitter {`
			`static lc_name() {`
			`return "CharacterTextSplitter";`
			`}`
			`constructor(fields) {`
			`super(fields);`
			`Object.defineProperty(this, "separator", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: "\n\n"`
			`});`
			`this.separator = fields?.separator ?? this.separator;`
			`}`
			`async splitText(text) {`
			`// First we naively split the large input into a bunch of smaller ones.`
			`const splits = this.splitOnSeparator(text, this.separator);`
			`return this.mergeSplits(splits, this.keepSeparator ? "" : this.separator);`
			`}`
			`}`
			`exports.CharacterTextSplitter = CharacterTextSplitter;`
			`exports.SupportedTextSplitterLanguages = [`
			`"cpp",`
			`"go",`
			`"java",`
			`"js",`
			`"php",`
			`"proto",`
			`"python",`
			`"rst",`
			`"ruby",`
			`"rust",`
			`"scala",`
			`"swift",`
			`"markdown",`
			`"latex",`
			`"html",`
			`"sol",`
			`];`
			`class RecursiveCharacterTextSplitter extends TextSplitter {`
			`static lc_name() {`
			`return "RecursiveCharacterTextSplitter";`
			`}`
			`constructor(fields) {`
			`super(fields);`
			`Object.defineProperty(this, "separators", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: ["\n\n", "\n", " ", ""]`
			`});`
			`this.separators = fields?.separators ?? this.separators;`
			`this.keepSeparator = fields?.keepSeparator ?? true;`
			`}`
			`async _splitText(text, separators) {`
			`const finalChunks = [];`
			`// Get appropriate separator to use`
			`let separator = separators[separators.length - 1];`
			`let newSeparators;`
			`for (let i = 0; i < separators.length; i += 1) {`
			`const s = separators[i];`
			`if (s === "") {`
			`separator = s;`
			`break;`
			`}`
			`if (text.includes(s)) {`
			`separator = s;`
			`newSeparators = separators.slice(i + 1);`
			`break;`
			`}`
			`}`
			`// Now that we have the separator, split the text`
			`const splits = this.splitOnSeparator(text, separator);`
			`// Now go merging things, recursively splitting longer texts.`
			`let goodSplits = [];`
			`const _separator = this.keepSeparator ? "" : separator;`
			`for (const s of splits) {`
			`if ((await this.lengthFunction(s)) < this.chunkSize) {`
			`goodSplits.push(s);`
			`}`
			`else {`
			`if (goodSplits.length) {`
			`const mergedText = await this.mergeSplits(goodSplits, _separator);`
			`finalChunks.push(...mergedText);`
			`goodSplits = [];`
			`}`
			`if (!newSeparators) {`
			`finalChunks.push(s);`
			`}`
			`else {`
			`const otherInfo = await this._splitText(s, newSeparators);`
			`finalChunks.push(...otherInfo);`
			`}`
			`}`
			`}`
			`if (goodSplits.length) {`
			`const mergedText = await this.mergeSplits(goodSplits, _separator);`
			`finalChunks.push(...mergedText);`
			`}`
			`return finalChunks;`
			`}`
			`async splitText(text) {`
			`return this._splitText(text, this.separators);`
			`}`
			`static fromLanguage(language, options) {`
			`return new RecursiveCharacterTextSplitter({`
			`...options,`
			`separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage(language),`
			`});`
			`}`
			`static getSeparatorsForLanguage(language) {`
			`if (language === "cpp") {`
			`return [`
			`// Split along class definitions`
			`"\nclass ",`
			`// Split along function definitions`
			`"\nvoid ",`
			`"\nint ",`
			`"\nfloat ",`
			`"\ndouble ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\nswitch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "go") {`
			`return [`
			`// Split along function definitions`
			`"\nfunc ",`
			`"\nvar ",`
			`"\nconst ",`
			`"\ntype ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nswitch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "java") {`
			`return [`
			`// Split along class definitions`
			`"\nclass ",`
			`// Split along method definitions`
			`"\npublic ",`
			`"\nprotected ",`
			`"\nprivate ",`
			`"\nstatic ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\nswitch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "js") {`
			`return [`
			`// Split along function definitions`
			`"\nfunction ",`
			`"\nconst ",`
			`"\nlet ",`
			`"\nvar ",`
			`"\nclass ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\nswitch ",`
			`"\ncase ",`
			`"\ndefault ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "php") {`
			`return [`
			`// Split along function definitions`
			`"\nfunction ",`
			`// Split along class definitions`
			`"\nclass ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nforeach ",`
			`"\nwhile ",`
			`"\ndo ",`
			`"\nswitch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "proto") {`
			`return [`
			`// Split along message definitions`
			`"\nmessage ",`
			`// Split along service definitions`
			`"\nservice ",`
			`// Split along enum definitions`
			`"\nenum ",`
			`// Split along option definitions`
			`"\noption ",`
			`// Split along import statements`
			`"\nimport ",`
			`// Split along syntax declarations`
			`"\nsyntax ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "python") {`
			`return [`
			`// First, try to split along class definitions`
			`"\nclass ",`
			`"\ndef ",`
			`"\n\tdef ",`
			`// Now split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "rst") {`
			`return [`
			`// Split along section titles`
			`"\n===\n",`
			`"\n---\n",`
			`"\n***\n",`
			`// Split along directive markers`
			`"\n.. ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "ruby") {`
			`return [`
			`// Split along method definitions`
			`"\ndef ",`
			`"\nclass ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nunless ",`
			`"\nwhile ",`
			`"\nfor ",`
			`"\ndo ",`
			`"\nbegin ",`
			`"\nrescue ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "rust") {`
			`return [`
			`// Split along function definitions`
			`"\nfn ",`
			`"\nconst ",`
			`"\nlet ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nwhile ",`
			`"\nfor ",`
			`"\nloop ",`
			`"\nmatch ",`
			`"\nconst ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "scala") {`
			`return [`
			`// Split along class definitions`
			`"\nclass ",`
			`"\nobject ",`
			`// Split along method definitions`
			`"\ndef ",`
			`"\nval ",`
			`"\nvar ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\nmatch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "swift") {`
			`return [`
			`// Split along function definitions`
			`"\nfunc ",`
			`// Split along class definitions`
			`"\nclass ",`
			`"\nstruct ",`
			`"\nenum ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\ndo ",`
			`"\nswitch ",`
			`"\ncase ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "markdown") {`
			`return [`
			`// First, try to split along Markdown headings (starting with level 2)`
			`"\n## ",`
			`"\n### ",`
			`"\n#### ",`
			`"\n##### ",`
			`"\n###### ",`
			`// Note the alternative syntax for headings (below) is not handled here`
			`// Heading level 2`
			`// ---------------`
			`// End of code block`
			"```\n\n",
			`// Horizontal lines`
			`"\n\n***\n\n",`
			`"\n\n---\n\n",`
			`"\n\n___\n\n",`
			`// Note that this splitter doesn't handle horizontal lines defined`
			`// by three or more of ***, ---, or ___, but this is not handled`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "latex") {`
			`return [`
			`// First, try to split along Latex sections`
			`"\n\\chapter{",`
			`"\n\\section{",`
			`"\n\\subsection{",`
			`"\n\\subsubsection{",`
			`// Now split by environments`
			`"\n\\begin{enumerate}",`
			`"\n\\begin{itemize}",`
			`"\n\\begin{description}",`
			`"\n\\begin{list}",`
			`"\n\\begin{quote}",`
			`"\n\\begin{quotation}",`
			`"\n\\begin{verse}",`
			`"\n\\begin{verbatim}",`
			`// Now split by math environments`
			`"\n\\begin{align}",`
			`"$$",`
			`"$",`
			`// Now split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "html") {`
			`return [`
			`// First, try to split along HTML tags`
			`"<body>",`
			`"<div>",`
			`"<p>",`
			`"<br>",`
			`"<li>",`
			`"<h1>",`
			`"<h2>",`
			`"<h3>",`
			`"<h4>",`
			`"<h5>",`
			`"<h6>",`
			`"<span>",`
			`"<table>",`
			`"<tr>",`
			`"<td>",`
			`"<th>",`
			`"<ul>",`
			`"<ol>",`
			`"<header>",`
			`"<footer>",`
			`"<nav>",`
			`// Head`
			`"<head>",`
			`"<style>",`
			`"<script>",`
			`"<meta>",`
			`"<title>",`
			`// Normal type of lines`
			`" ",`
			`"",`
			`];`
			`}`
			`else if (language === "sol") {`
			`return [`
			`// Split along compiler informations definitions`
			`"\npragma ",`
			`"\nusing ",`
			`// Split along contract definitions`
			`"\ncontract ",`
			`"\ninterface ",`
			`"\nlibrary ",`
			`// Split along method definitions`
			`"\nconstructor ",`
			`"\ntype ",`
			`"\nfunction ",`
			`"\nevent ",`
			`"\nmodifier ",`
			`"\nerror ",`
			`"\nstruct ",`
			`"\nenum ",`
			`// Split along control flow statements`
			`"\nif ",`
			`"\nfor ",`
			`"\nwhile ",`
			`"\ndo while ",`
			`"\nassembly ",`
			`// Split by the normal type of lines`
			`"\n\n",`
			`"\n",`
			`" ",`
			`"",`
			`];`
			`}`
			`else {`
			throw new Error(`Language ${language} is not supported.`);
			`}`
			`}`
			`}`
			`exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;`
			`/**`
			`* Implementation of splitter which looks at tokens.`
			`*/`
			`class TokenTextSplitter extends TextSplitter {`
			`static lc_name() {`
			`return "TokenTextSplitter";`
			`}`
			`constructor(fields) {`
			`super(fields);`
			`Object.defineProperty(this, "encodingName", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: void 0`
			`});`
			`Object.defineProperty(this, "allowedSpecial", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: void 0`
			`});`
			`Object.defineProperty(this, "disallowedSpecial", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: void 0`
			`});`
			`Object.defineProperty(this, "tokenizer", {`
			`enumerable: true,`
			`configurable: true,`
			`writable: true,`
			`value: void 0`
			`});`
			`this.encodingName = fields?.encodingName ?? "gpt2";`
			`this.allowedSpecial = fields?.allowedSpecial ?? [];`
			`this.disallowedSpecial = fields?.disallowedSpecial ?? "all";`
			`}`
			`async splitText(text) {`
			`if (!this.tokenizer) {`
			`this.tokenizer = await (0, tiktoken_1.getEncoding)(this.encodingName);`
			`}`
			`const splits = [];`
			`const input_ids = this.tokenizer.encode(text, this.allowedSpecial, this.disallowedSpecial);`
			`let start_idx = 0;`
			`while (start_idx < input_ids.length) {`
			`if (start_idx > 0) {`
			`start_idx -= this.chunkOverlap;`
			`}`
			`const end_idx = Math.min(start_idx + this.chunkSize, input_ids.length);`
			`const chunk_ids = input_ids.slice(start_idx, end_idx);`
			`splits.push(this.tokenizer.decode(chunk_ids));`
			`start_idx = end_idx;`
			`}`
			`return splits;`
			`}`
			`}`
			`exports.TokenTextSplitter = TokenTextSplitter;`
			`class MarkdownTextSplitter extends RecursiveCharacterTextSplitter {`
			`constructor(fields) {`
			`super({`
			`...fields,`
			`separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("markdown"),`
			`});`
			`}`
			`}`
			`exports.MarkdownTextSplitter = MarkdownTextSplitter;`
			`class LatexTextSplitter extends RecursiveCharacterTextSplitter {`
			`constructor(fields) {`
			`super({`
			`...fields,`
			`separators: RecursiveCharacterTextSplitter.getSeparatorsForLanguage("latex"),`
			`});`
			`}`
			`}`
			`exports.LatexTextSplitter = LatexTextSplitter;`