import logging import warnings from enum import Enum from pathlib import Path from typing import Annotated, Optional import typer from rich.console import Console from rich.logging import RichHandler from docling.datamodel.settings import settings from docling.utils.model_downloader import download_models warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch") warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr") console = Console() err_console = Console(stderr=True) app = typer.Typer( name="Docling models helper", no_args_is_help=True, add_completion=False, pretty_exceptions_enable=False, ) class _AvailableModels(str, Enum): LAYOUT = "layout" TABLEFORMER = "tableformer" CODE_FORMULA = "code_formula" PICTURE_CLASSIFIER = "picture_classifier" EASYOCR = "easyocr" @app.command("download") def download( output_dir: Annotated[ Path, typer.Option( ..., "-o", "--output-dir", help="The directory where all the models are downloaded.", ), ] = (settings.cache_dir / "models"), force: Annotated[ bool, typer.Option(..., help="If true, the download will be forced") ] = False, models: Annotated[ Optional[list[_AvailableModels]], typer.Argument( help=f"Models to download (default behavior: all will be downloaded)", ), ] = None, quiet: Annotated[ bool, typer.Option( ..., "-q", "--quiet", help="No extra output is generated, the CLI prints only the directory with the cached models.", ), ] = False, ): if not quiet: FORMAT = "%(message)s" logging.basicConfig( level=logging.INFO, format="[blue]%(message)s[/blue]", datefmt="[%X]", handlers=[RichHandler(show_level=False, show_time=False, markup=True)], ) to_download = models or [m for m in _AvailableModels] output_dir = download_models( output_dir=output_dir, force=force, progress=(not quiet), with_layout=_AvailableModels.LAYOUT in to_download, with_tableformer=_AvailableModels.TABLEFORMER in to_download, with_code_formula=_AvailableModels.CODE_FORMULA in to_download, with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download, with_easyocr=_AvailableModels.EASYOCR in to_download, ) if quiet: typer.echo(output_dir) else: typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green") console.print( "\n", "Docling can now be configured for running offline using the local artifacts.\n\n", "Using the CLI:", f"`docling --artifacts-path={output_dir} FILE`", "\n", "Using Python: see the documentation at .", ) click_app = typer.main.get_command(app) if __name__ == "__main__": app()