
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
131 lines
3.8 KiB
Python
131 lines
3.8 KiB
Python
import logging
|
|
import warnings
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Annotated, Optional
|
|
|
|
import typer
|
|
from rich.console import Console
|
|
from rich.logging import RichHandler
|
|
|
|
from docling.datamodel.settings import settings
|
|
from docling.utils.model_downloader import download_models
|
|
|
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
|
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
|
|
|
console = Console()
|
|
err_console = Console(stderr=True)
|
|
|
|
|
|
app = typer.Typer(
|
|
name="Docling models helper",
|
|
no_args_is_help=True,
|
|
add_completion=False,
|
|
pretty_exceptions_enable=False,
|
|
)
|
|
|
|
|
|
class _AvailableModels(str, Enum):
|
|
LAYOUT = "layout"
|
|
TABLEFORMER = "tableformer"
|
|
CODE_FORMULA = "code_formula"
|
|
PICTURE_CLASSIFIER = "picture_classifier"
|
|
SMOLVLM = "smolvlm"
|
|
GRANITE_VISION = "granite_vision"
|
|
EASYOCR = "easyocr"
|
|
|
|
|
|
_default_models = [
|
|
_AvailableModels.LAYOUT,
|
|
_AvailableModels.TABLEFORMER,
|
|
_AvailableModels.CODE_FORMULA,
|
|
_AvailableModels.PICTURE_CLASSIFIER,
|
|
_AvailableModels.EASYOCR,
|
|
]
|
|
|
|
|
|
@app.command("download")
|
|
def download(
|
|
output_dir: Annotated[
|
|
Path,
|
|
typer.Option(
|
|
...,
|
|
"-o",
|
|
"--output-dir",
|
|
help="The directory where to download the models.",
|
|
),
|
|
] = (settings.cache_dir / "models"),
|
|
force: Annotated[
|
|
bool, typer.Option(..., help="If true, the download will be forced.")
|
|
] = False,
|
|
models: Annotated[
|
|
Optional[list[_AvailableModels]],
|
|
typer.Argument(
|
|
help="Models to download (default behavior: a predefined set of models will be downloaded).",
|
|
),
|
|
] = None,
|
|
all: Annotated[
|
|
bool,
|
|
typer.Option(
|
|
...,
|
|
"--all",
|
|
help="If true, all available models will be downloaded (mutually exclusive with passing specific models).",
|
|
show_default=True,
|
|
),
|
|
] = False,
|
|
quiet: Annotated[
|
|
bool,
|
|
typer.Option(
|
|
...,
|
|
"-q",
|
|
"--quiet",
|
|
help="No extra output is generated, the CLI prints only the directory with the cached models.",
|
|
),
|
|
] = False,
|
|
):
|
|
if models and all:
|
|
raise typer.BadParameter(
|
|
"Cannot simultaneously set 'all' parameter and specify models to download."
|
|
)
|
|
if not quiet:
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="[blue]%(message)s[/blue]",
|
|
datefmt="[%X]",
|
|
handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
|
|
)
|
|
to_download = models or (list(_AvailableModels) if all else _default_models)
|
|
output_dir = download_models(
|
|
output_dir=output_dir,
|
|
force=force,
|
|
progress=(not quiet),
|
|
with_layout=_AvailableModels.LAYOUT in to_download,
|
|
with_tableformer=_AvailableModels.TABLEFORMER in to_download,
|
|
with_code_formula=_AvailableModels.CODE_FORMULA in to_download,
|
|
with_picture_classifier=_AvailableModels.PICTURE_CLASSIFIER in to_download,
|
|
with_smolvlm=_AvailableModels.SMOLVLM in to_download,
|
|
with_granite_vision=_AvailableModels.GRANITE_VISION in to_download,
|
|
with_easyocr=_AvailableModels.EASYOCR in to_download,
|
|
)
|
|
|
|
if quiet:
|
|
typer.echo(output_dir)
|
|
else:
|
|
typer.secho(f"\nModels downloaded into: {output_dir}.", fg="green")
|
|
|
|
console.print(
|
|
"\n",
|
|
"Docling can now be configured for running offline using the local artifacts.\n\n",
|
|
"Using the CLI:",
|
|
f"`docling --artifacts-path={output_dir} FILE`",
|
|
"\n",
|
|
"Using Python: see the documentation at <https://docling-project.github.io/docling/usage>.",
|
|
)
|
|
|
|
|
|
click_app = typer.main.get_command(app)
|
|
|
|
if __name__ == "__main__":
|
|
app()
|