import type * as tiktoken from "js-tiktoken"; import { Document, BaseDocumentTransformer } from "@langchain/core/documents"; export interface TextSplitterParams { chunkSize: number; chunkOverlap: number; keepSeparator: boolean; lengthFunction?: ((text: string) => number) | ((text: string) => Promise); } export type TextSplitterChunkHeaderOptions = { chunkHeader?: string; chunkOverlapHeader?: string; appendChunkOverlapHeader?: boolean; }; export declare abstract class TextSplitter extends BaseDocumentTransformer implements TextSplitterParams { lc_namespace: string[]; chunkSize: number; chunkOverlap: number; keepSeparator: boolean; lengthFunction: ((text: string) => number) | ((text: string) => Promise); constructor(fields?: Partial); transformDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; abstract splitText(text: string): Promise; protected splitOnSeparator(text: string, separator: string): string[]; createDocuments(texts: string[], metadatas?: Record[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; private numberOfNewLines; splitDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise; private joinDocs; mergeSplits(splits: string[], separator: string): Promise; } export interface CharacterTextSplitterParams extends TextSplitterParams { separator: string; } export declare class CharacterTextSplitter extends TextSplitter implements CharacterTextSplitterParams { static lc_name(): string; separator: string; constructor(fields?: Partial); splitText(text: string): Promise; } export interface RecursiveCharacterTextSplitterParams extends TextSplitterParams { separators: string[]; } export declare const SupportedTextSplitterLanguages: readonly ["cpp", "go", "java", "js", "php", "proto", "python", "rst", "ruby", "rust", "scala", "swift", "markdown", "latex", "html", "sol"]; export type SupportedTextSplitterLanguage = (typeof SupportedTextSplitterLanguages)[number]; export declare class RecursiveCharacterTextSplitter extends TextSplitter implements RecursiveCharacterTextSplitterParams { static lc_name(): string; separators: string[]; constructor(fields?: Partial); private _splitText; splitText(text: string): Promise; static fromLanguage(language: SupportedTextSplitterLanguage, options?: Partial): RecursiveCharacterTextSplitter; static getSeparatorsForLanguage(language: SupportedTextSplitterLanguage): string[]; } export interface TokenTextSplitterParams extends TextSplitterParams { encodingName: tiktoken.TiktokenEncoding; allowedSpecial: "all" | Array; disallowedSpecial: "all" | Array; } /** * Implementation of splitter which looks at tokens. */ export declare class TokenTextSplitter extends TextSplitter implements TokenTextSplitterParams { static lc_name(): string; encodingName: tiktoken.TiktokenEncoding; allowedSpecial: "all" | Array; disallowedSpecial: "all" | Array; private tokenizer; constructor(fields?: Partial); splitText(text: string): Promise; } export type MarkdownTextSplitterParams = TextSplitterParams; export declare class MarkdownTextSplitter extends RecursiveCharacterTextSplitter implements MarkdownTextSplitterParams { constructor(fields?: Partial); } export type LatexTextSplitterParams = TextSplitterParams; export declare class LatexTextSplitter extends RecursiveCharacterTextSplitter implements LatexTextSplitterParams { constructor(fields?: Partial); }