Docling/docling/cli/models.py
Michele Dolfi 5458a88464
ci: add coverage and ruff (#1383)
* add coverage calculation and push

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* new codecov version and usage of token

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* enable ruff formatter instead of black and isort

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply ruff lint fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply ruff unsafe fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add removed imports

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* runs 1 on linter issues

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* finalize linter fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Update pyproject.toml

Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-04-14 18:01:26 +02:00

131 lines
3.8 KiB
Python

import logging
import warnings
from enum import Enum
from pathlib import Path
from typing import Annotated, Optional
import typer
from rich.console import Console
from rich.logging import RichHandler
from docling.datamodel.settings import settings
from docling.utils.model_downloader import download_models
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
console = Console()
err_console = Console(stderr=True)
app = typer.Typer(
name="Docling models helper",
no_args_is_help=True,
add_completion=False,
pretty_exceptions_enable=False,
)
class _AvailableModels(str, Enum):
LAYOUT = "layout"
TABLEFORMER = "tableformer"
CODE_FORMULA = "code_formula"
PICTURE_CLASSIFIER = "picture_classifier"
SMOLVLM = "smolvlm"
GRANITE_VISION = "granite_vision"
EASYOCR = "easyocr"
_default_models = [
_AvailableModels.LAYOUT,
_AvailableModels.TABLEFORMER,
_AvailableModels.CODE_FORMULA,
_AvailableModels.PICTURE_CLASSIFIER,
_AvailableModels.EASYOCR,
]
@app.command("download")
def download(
output_dir: Annotated[
Path,
typer.Option(
...,
"-o",
"--output-dir",
help="The directory where to download the models.",
),
] = (settings.cache_dir / "models"),
force: Annotated[
bool, typer.Option(..., help="If true, the download will be forced.")
] = False,
models: Annotated[
Optional[list[_AvailableModels]],
typer.Argument(
help="Models to download (default behavior: a predefined set of models will be downloaded).",
),
] = None,
all: Annotated[
bool,
typer.Option(
...,
"--all",
help="If true, all available models will be downloaded (mutually exclusive with passing specific models).",
show_default=True,
),
] = False,
quiet: Annotated[
bool,
typer.Option(
...,
"-q",
"--quiet",
help="No extra output is generated, the CLI prints only the directory with the cached models.",
),
] = False,
):
if models and all:
raise typer.BadParameter(
"Cannot simultaneously set 'all' parameter and specify models to download."
)
if not quiet:
logging.basicConfig(
level=logging.INFO,
format="[blue]%(message)s[/blue]",
datefmt="[%X]",
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
)
to_download = models or (list(_AvailableModels) if all else _default_models)
output_dir = download_models(
output_dir=output_dir,
force=force,
progress=(not quiet),
with_layout=_AvailableModels.LAYOUT in to_download,
with_tableformer=_AvailableModels.TABLEFORMER in to_download,
with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
with_smolvlm=_AvailableModels.SMOLVLM in to_download,
with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
with_easyocr=_AvailableModels.EASYOCR in to_download,
)
if quiet:
typer.echo(output_dir)
else:
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
console.print(
"\n",
"Docling can now be configured for running offline using the local artifacts.\n\n",
"Using the CLI:",
f"`docling --artifacts-path={output_dir} FILE`",
"\n",
"Using Python: see the documentation at <https://docling-project.github.io/docling/usage>.",
)
click_app = typer.main.get_command(app)
if __name__ == "__main__":
app()