[tool.poetry] name = "docling" version = "2.26.0" # DO NOT EDIT, updated automatically description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications." authors = [ "Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Panos Vagenas ", "Peter Staar ", ] license = "MIT" readme = "README.md" repository = "https://github.com/docling-project/docling" homepage = "https://github.com/docling-project/docling" keywords = [ "docling", "convert", "document", "pdf", "docx", "html", "markdown", "layout model", "segmentation", "table structure", "table former", ] classifiers = [ "License :: OSI Approved :: MIT License", "Operating System :: MacOS :: MacOS X", "Operating System :: POSIX :: Linux", "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Programming Language :: Python :: 3", ] packages = [{ include = "docling" }] [tool.poetry.dependencies] ###################### # actual dependencies: ###################### python = "^3.9" pydantic = "^2.0.0" docling-core = {extras = ["chunking"], version = "^2.23.0"} docling-ibm-models = "^3.4.0" docling-parse = "^4.0.0" filetype = "^1.2.0" pypdfium2 = "^4.30.0" pydantic-settings = "^2.3.0" huggingface_hub = ">=0.23,<1" requests = "^2.32.2" easyocr = "^1.7" tesserocr = { version = "^2.7.1", optional = true } certifi = ">=2024.7.4" rtree = "^1.3.0" scipy = [ { version = "^1.6.0", markers = "python_version >= '3.10'" }, { version = ">=1.6.0,<1.14.0", markers = "python_version < '3.10'" }, ] typer = "^0.12.5" python-docx = "^1.1.2" python-pptx = "^1.0.2" beautifulsoup4 = "^4.12.3" pandas = "^2.1.4" marko = "^2.1.2" openpyxl = "^3.1.5" lxml = ">=4.0.0,<6.0.0" ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true } rapidocr-onnxruntime = { version = "^1.4.0", optional = true, markers = "python_version < '3.13'" } onnxruntime = [ # 1.19.2 is the last version with python3.9 support, # see https://github.com/microsoft/onnxruntime/releases/tag/v1.20.0 { version = ">=1.7.0,<1.20.0", optional = true, markers = "python_version < '3.10'" }, { version = "^1.7.0", optional = true, markers = "python_version >= '3.10'" }, ] transformers = [ { markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^4.46.0", optional = true }, { markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~4.42.0", optional = true }, ] accelerate = [ { markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^1.2.1", optional = true }, ] pillow = ">=10.0.0,<12.0.0" tqdm = "^4.65.0" pluggy = "^1.0.0" pylatexenc = "^2.10" [tool.poetry.group.dev.dependencies] black = { extras = ["jupyter"], version = "^24.4.2" } pytest = "^7.2.2" pre-commit = "^3.7.1" mypy = "^1.10.1" isort = "^5.10.1" python-semantic-release = "^7.32.2" flake8 = "^6.0.0" pyproject-flake8 = "^6.0.0" pytest-xdist = "^3.3.1" types-requests = "^2.31.0.2" flake8-pyproject = "^1.2.3" pylint = "^2.17.5" pandas-stubs = "^2.1.4.231227" ipykernel = "^6.29.5" ipywidgets = "^8.1.5" nbqa = "^1.9.0" types-openpyxl = "^3.1.5.20241114" types-tqdm = "^4.67.0.20241221" [tool.poetry.group.docs.dependencies] mkdocs-material = "^9.5.40" mkdocs-jupyter = "^0.25.0" mkdocs-click = "^0.8.1" mkdocstrings = { extras = ["python"], version = "^0.27.0" } griffe-pydantic = "^1.1.0" [tool.poetry.group.examples.dependencies] datasets = "^2.21.0" python-dotenv = "^1.0.1" langchain-huggingface = "^0.0.3" langchain-milvus = "^0.1.4" langchain-text-splitters = "^0.2.4" [tool.poetry.group.constraints] optional = true [tool.poetry.group.constraints.dependencies] numpy = [ { version = ">=1.24.4,<3.0.0", markers = 'python_version >= "3.10"' }, { version = ">=1.24.4,<2.1.0", markers = 'python_version < "3.10"' }, ] [tool.poetry.group.mac_intel] optional = true [tool.poetry.group.mac_intel.dependencies] torch = [ { markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^2.2.2" }, { markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~2.2.2" }, ] torchvision = [ { markers = "sys_platform != 'darwin' or platform_machine != 'x86_64'", version = "^0" }, { markers = "sys_platform == 'darwin' and platform_machine == 'x86_64'", version = "~0.17.2" }, ] [tool.poetry.extras] tesserocr = ["tesserocr"] ocrmac = ["ocrmac"] vlm = ["transformers", "accelerate"] rapidocr = ["rapidocr-onnxruntime", "onnxruntime"] [tool.poetry.scripts] docling = "docling.cli.main:app" docling-tools = "docling.cli.tools:app" [tool.poetry.plugins."docling"] "docling_defaults" = "docling.models.plugins.defaults" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" [tool.black] line-length = 88 target-version = ["py39"] include = '\.pyi?$' [tool.isort] profile = "black" line_length = 88 py_version = 39 [tool.mypy] pretty = true # strict = true no_implicit_optional = true plugins = "pydantic.mypy" python_version = "3.10" [[tool.mypy.overrides]] module = [ "docling_parse.*", "pypdfium2.*", "networkx.*", "scipy.*", "filetype.*", "tesserocr.*", "docling_ibm_models.*", "easyocr.*", "ocrmac.*", "lxml.*", "huggingface_hub.*", "transformers.*", "pylatexenc.*", ] ignore_missing_imports = true [tool.flake8] max-line-length = 88 extend-ignore = ["E203", "E501"] [tool.semantic_release] # for default values check: # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg version_source = "tag_only" branch = "main" # configure types which should trigger minor and patch version bumps respectively # (note that they must be a subset of the configured allowed types): parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test" parser_angular_minor_types = "feat" parser_angular_patch_types = "fix,perf"