/// /// /// import type { basename as BasenameT } from "node:path"; import type { readFile as ReadFileT } from "node:fs/promises"; import { Document } from "@langchain/core/documents"; import { StringWithAutocomplete } from "@langchain/core/utils/types"; import { DirectoryLoader, UnknownHandling } from "langchain/document_loaders/fs/directory"; import { BaseDocumentLoader } from "@langchain/core/document_loaders/base"; export declare const UNSTRUCTURED_API_FILETYPES: string[]; /** * Represents an element returned by the Unstructured API. It has * properties for the element type, text content, and metadata. */ type Element = { type: string; text: string; metadata: { [key: string]: unknown; }; }; /** * Represents the available strategies for the UnstructuredLoader. It can * be one of "hi_res", "fast", "ocr_only", or "auto". */ export type UnstructuredLoaderStrategy = "hi_res" | "fast" | "ocr_only" | "auto"; /** * Represents the available hi-res models for the UnstructuredLoader. It can * be one of "chipper". */ export type HiResModelName = "chipper"; /** * To enable or disable table extraction for file types other than PDF, set * the skipInferTableTypes property in the UnstructuredLoaderOptions object. * The skipInferTableTypes property is an array of file types for which table * extraction is disabled. For example, to disable table extraction for .docx * and .doc files, set the skipInferTableTypes property to ["docx", "doc"]. * You can also disable table extraction for all file types other than PDF by * setting the skipInferTableTypes property to []. */ export type SkipInferTableTypes = "txt" | "text" | "pdf" | "docx" | "doc" | "jpg" | "jpeg" | "eml" | "html" | "htm" | "md" | "pptx" | "ppt" | "msg" | "rtf" | "xlsx" | "xls" | "odt" | "epub"; /** * Set the chunking_strategy to chunk text into larger or smaller elements. Defaults to None with optional arg of by_title */ export type ChunkingStrategy = "None" | "by_title"; export type UnstructuredLoaderOptions = { apiKey?: string; apiUrl?: string; strategy?: StringWithAutocomplete; encoding?: string; ocrLanguages?: Array; coordinates?: boolean; pdfInferTableStructure?: boolean; xmlKeepTags?: boolean; skipInferTableTypes?: Array>; hiResModelName?: StringWithAutocomplete; includePageBreaks?: boolean; chunkingStrategy?: StringWithAutocomplete; multiPageSections?: boolean; combineUnderNChars?: number; newAfterNChars?: number; maxCharacters?: number; extractImageBlockTypes?: string[]; overlap?: number; overlapAll?: boolean; }; export type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & { recursive?: boolean; unknown?: UnknownHandling; }; export type UnstructuredMemoryLoaderOptions = { buffer: Buffer; fileName: string; }; /** * A document loader that uses the Unstructured API to load unstructured * documents. It supports both the new syntax with options object and the * legacy syntax for backward compatibility. The load() method sends a * partitioning request to the Unstructured API and retrieves the * partitioned elements. It creates a Document instance for each element * and returns an array of Document instances. * * It accepts either a filepath or an object containing a buffer and a filename * as input. */ export declare class UnstructuredLoader extends BaseDocumentLoader { filePath: string; private buffer?; private fileName?; private apiUrl; private apiKey?; private strategy; private encoding?; private ocrLanguages; private coordinates?; private pdfInferTableStructure?; private xmlKeepTags?; private skipInferTableTypes?; private hiResModelName?; private includePageBreaks?; private chunkingStrategy?; private multiPageSections?; private combineUnderNChars?; private newAfterNChars?; private maxCharacters?; private extractImageBlockTypes?; private overlap?; private overlapAll?; constructor(filepathOrBufferOptions: string | UnstructuredMemoryLoaderOptions, unstructuredOptions?: UnstructuredLoaderOptions | string); _partition(): Promise; load(): Promise; imports(): Promise<{ readFile: typeof ReadFileT; basename: typeof BasenameT; }>; } /** * A document loader that loads unstructured documents from a directory * using the UnstructuredLoader. It creates a UnstructuredLoader instance * for each supported file type and passes it to the DirectoryLoader * constructor. * @example * ```typescript * const loader = new UnstructuredDirectoryLoader("path/to/directory", { * apiKey: "MY_API_KEY", * }); * const docs = await loader.load(); * ``` */ export declare class UnstructuredDirectoryLoader extends DirectoryLoader { constructor(directoryPathOrLegacyApiUrl: string, optionsOrLegacyDirectoryPath: UnstructuredDirectoryLoaderOptions | string, legacyOptionRecursive?: boolean, legacyOptionUnknown?: UnknownHandling); } export { UnknownHandling };