docs: add automatic generation of CLI reference (#325)

* docs: add automatic generation of CLI reference

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* install deps for building CLI ref

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-11-15 13:18:17 +01:00 committed by GitHub
parent 25fd149c38
commit ca8524ecae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 48 additions and 68 deletions

View File

@ -11,15 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install poetry
run: pipx install poetry==1.8.3
shell: bash
- uses: actions/setup-python@v5
with:
cache: 'poetry'
- name: Install dependencies
run: poetry install --only docs
shell: bash
- uses: ./.github/actions/setup-poetry
- name: Build docs
run: poetry run mkdocs build --verbose --clean
- name: Build and push docs

View File

@ -301,5 +301,7 @@ def convert(
_log.info(f"All documents were converted in {end_time:.2f} seconds.")
click_app = typer.main.get_command(app)
if __name__ == "__main__":
app()

9
docs/cli.md Normal file
View File

@ -0,0 +1,9 @@
# CLI Reference
This page provides documentation for our command line tools.
::: mkdocs-click
:module: docling.cli.main
:command: click_app
:prog_name: docling
:style: table

View File

@ -22,54 +22,7 @@ A simple example would look like this:
docling https://arxiv.org/pdf/2206.01062
```
To see all available options (export formats etc.) run `docling --help`.
<details>
<summary><b>CLI reference</b></summary>
Here are the available options as of this writing (for an up-to-date listing, run `docling --help`):
```console
$ docling --help
Usage: docling [OPTIONS] source
╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * input_sources source PDF files to convert. Can be local file / directory paths or URL. [default: None] │
│ [required] │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ --from [docx|pptx|html|image|pdf|asciidoc|md] Specify input formats to convert from. │
│ Defaults to all formats. │
│ [default: None] │
│ --to [md|json|text|doctags] Specify output formats. Defaults to │
│ Markdown. │
│ [default: None] │
│ --ocr --no-ocr If enabled, the bitmap content will be │
│ processed using OCR. │
│ [default: ocr] │
│ --ocr-engine [easyocr|tesseract_cli|tesseract] The OCR engine to use. │
│ [default: easyocr] │
│ --pdf-backend [pypdfium2|dlparse_v1|dlparse_v2] The PDF backend to use. │
│ [default: dlparse_v1] │
│ --table-mode [fast|accurate] The mode to use in the table structure │
│ model. │
│ [default: fast] │
│ --artifacts-path PATH If provided, the location of the model │
│ artifacts. │
│ [default: None] │
│ --abort-on-error --no-abort-on-error If enabled, the bitmap content will be │
│ processed using OCR. │
│ [default: no-abort-on-error] │
│ --output PATH Output directory where results are │
│ saved. │
│ [default: .] │
│ --version Show version information. │
│ --help Show this message and exit. │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
```
</details>
To see all available options (export formats etc.) run `docling --help`. More details in the [CLI reference page](./cli.md).

View File

@ -55,6 +55,7 @@ nav:
- Home: index.md
- Installation: installation.md
- Usage: usage.md
- CLI: cli.md
- Docling v2: v2.md
- Concepts:
- Concepts: concepts/index.md
@ -99,9 +100,16 @@ markdown_extensions:
- admonition
- pymdownx.details
- attr_list
- mkdocs-click
plugins:
- search
- mkdocs-jupyter
# - mkdocstrings:
# default_handler: python
# options:
# preload_modules:
# - docling
# - docling_core
extra_css:
- stylesheets/extra.css

37
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@ -196,8 +196,8 @@ files = [
lazy-object-proxy = ">=1.4.0"
typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
wrapt = [
{version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
{version = ">=1.11,<2", markers = "python_version < \"3.11\""},
{version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
]
[[package]]
@ -839,8 +839,8 @@ files = [
docling-core = ">=2.0,<3.0"
docutils = "!=0.21"
numpy = [
{version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\" and python_version < \"3.13\""},
{version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
]
pandas = {version = ">=2.1.4,<3.0.0", markers = "python_version >= \"3.9\""}
python-dotenv = ">=1.0.0,<2.0.0"
@ -927,8 +927,8 @@ jsonlines = ">=3.1.0,<4.0.0"
lxml = ">=4.9.1,<5.0.0"
mean_average_precision = ">=2021.4.26.0,<2022.0.0.0"
numpy = [
{version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.24.4,<2.0.0", markers = "python_version < \"3.13\""},
{version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
]
opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
Pillow = ">=10.0.0,<11.0.0"
@ -2067,8 +2067,8 @@ jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.1.112,<0.2.0"
packaging = ">=23.2,<25"
pydantic = [
{version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
{version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
PyYAML = ">=5.3"
tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@ -2136,8 +2136,8 @@ files = [
httpx = ">=0.23.0,<1"
orjson = ">=3.9.14,<4.0.0"
pydantic = [
{version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
{version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
requests = ">=2,<3"
requests-toolbelt = ">=1.0.0,<2.0.0"
@ -2597,6 +2597,21 @@ watchdog = ">=2.0"
i18n = ["babel (>=2.9.0)"]
min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
[[package]]
name = "mkdocs-click"
version = "0.8.1"
description = "An MkDocs extension to generate documentation for Click command line applications"
optional = false
python-versions = ">=3.7"
files = [
{file = "mkdocs_click-0.8.1-py3-none-any.whl", hash = "sha256:a100ff938be63911f86465a1c21d29a669a7c51932b700fdb3daa90d13b61ee4"},
{file = "mkdocs_click-0.8.1.tar.gz", hash = "sha256:0a88cce04870c5d70ff63138e2418219c3c4119cc928a59c66b76eb5214edba6"},
]
[package.dependencies]
click = ">=8.1"
markdown = ">=3.3"
[[package]]
name = "mkdocs-get-deps"
version = "0.2.0"
@ -3507,10 +3522,10 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
[[package]]
@ -3659,9 +3674,9 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@ -4258,8 +4273,8 @@ files = [
annotated-types = ">=0.6.0"
pydantic-core = "2.23.4"
typing-extensions = [
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
{version = ">=4.12.2", markers = "python_version >= \"3.13\""},
]
[package.extras]
@ -4427,8 +4442,8 @@ files = [
astroid = ">=2.15.8,<=2.17.0-dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.3.6", markers = "python_version >= \"3.11\""},
{version = ">=0.2", markers = "python_version < \"3.11\""},
{version = ">=0.3.6", markers = "python_version >= \"3.11\""},
]
isort = ">=4.2.5,<6"
mccabe = ">=0.6,<0.8"
@ -7164,4 +7179,4 @@ tesserocr = ["tesserocr"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "3d187fb42d7001455eecc58d73b141fbe3d3972d963b2b0997dee282218d626e"
content-hash = "c7a2f4e30564c5bcd7ed96f203028f781a05ff2103698091616c8aff34ab3493"

View File

@ -69,6 +69,7 @@ nbqa = "^1.9.0"
[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.40"
mkdocs-jupyter = "^0.25.0"
mkdocs-click = "^0.8.1"
[tool.poetry.group.examples.dependencies]
datasets = "^2.21.0"