
* add the pytests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * renamed the test folder and added the toplevel test Signed-off-by: Peter Staar <taa@zurich.ibm.com> * updated the toplevel function test Signed-off-by: Peter Staar <taa@zurich.ibm.com> * need to start running all tests successfully Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added the reference converted documents Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added first test for json and md output Signed-off-by: Peter Staar <taa@zurich.ibm.com> * ran pre-commit Signed-off-by: Peter Staar <taa@zurich.ibm.com> * replaced deprecated json function with model_dump_json Signed-off-by: Peter Staar <taa@zurich.ibm.com> * replaced deprecated json function with model_dump_json Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformatted code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Fix backend tests Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * commented out the drawing Signed-off-by: Peter Staar <taa@zurich.ibm.com> * ci: avoid duplicate runs Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> * commented out json verification for now Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added verification of input cells Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformat code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added test to verify the cells in the pages Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added test to verify the cells in the pages (2) Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added test to verify the cells in the pages (3) Signed-off-by: Peter Staar <taa@zurich.ibm.com> * run all examples in CI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * make sure examples return failures Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * raise a failure if examples fail Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix examples Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * run examples after tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Add tests and update top_level_tests using only datamodels Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Remove unnecessary code Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Validate conversion status on e2e test Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * package verify utils and add more tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * reduce docs in example, since they are already in the tests Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * skip batch_convert Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * pin docling-parse 1.1.2 Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * updated the error messages Signed-off-by: Peter Staar <taa@zurich.ibm.com> * commented out the json verification for now Signed-off-by: Peter Staar <taa@zurich.ibm.com> * bumped GLM version Signed-off-by: Peter Staar <taa@zurich.ibm.com> * Fix lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Pin new docling-parse v1.1.3 Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
90 lines
2.6 KiB
TOML
90 lines
2.6 KiB
TOML
[tool.poetry]
|
|
name = "docling"
|
|
version = "1.8.4" # DO NOT EDIT, updated automatically
|
|
description = "Docling PDF conversion package"
|
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
repository = "https://github.com/DS4SD/docling"
|
|
homepage = "https://github.com/DS4SD/docling"
|
|
keywords= ["docling", "convert", "document", "pdf", "layout model", "segmentation", "table structure", "table former"]
|
|
classifiers = [
|
|
"License :: OSI Approved :: MIT License",
|
|
"Operating System :: MacOS :: MacOS X",
|
|
"Operating System :: POSIX :: Linux",
|
|
"Development Status :: 5 - Production/Stable",
|
|
"Intended Audience :: Developers",
|
|
"Intended Audience :: Science/Research",
|
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
"Programming Language :: Python :: 3"
|
|
]
|
|
packages = [{include = "docling"}]
|
|
|
|
[tool.poetry.dependencies]
|
|
python = "^3.10"
|
|
pydantic = "^2.0.0"
|
|
docling-core = "^1.1.2"
|
|
docling-ibm-models = "^1.1.3"
|
|
deepsearch-glm = "^0.19.1"
|
|
filetype = "^1.2.0"
|
|
pypdfium2 = "^4.30.0"
|
|
pydantic-settings = "^2.3.0"
|
|
huggingface_hub = ">=0.23,<1"
|
|
requests = "^2.32.3"
|
|
easyocr = "^1.7"
|
|
docling-parse = "^1.1.3"
|
|
certifi = ">=2024.7.4"
|
|
rtree = "^1.3.0"
|
|
scipy = "^1.14.1"
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
|
pytest = "^7.2.2"
|
|
pre-commit = "^3.7.1"
|
|
mypy = "^1.10.1"
|
|
isort = "^5.10.1"
|
|
python-semantic-release = "^7.32.2"
|
|
flake8 = "^6.0.0"
|
|
pyproject-flake8 = "^6.0.0"
|
|
pytest-xdist = "^3.3.1"
|
|
types-requests = "^2.31.0.2"
|
|
flake8-pyproject = "^1.2.3"
|
|
pylint = "^2.17.5"
|
|
|
|
[build-system]
|
|
requires = ["poetry-core"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.black]
|
|
line-length = 88
|
|
target-version = ["py310"]
|
|
include = '\.pyi?$'
|
|
|
|
[tool.isort]
|
|
profile = "black"
|
|
line_length = 88
|
|
py_version=311
|
|
|
|
[tool.mypy]
|
|
pretty = true
|
|
# strict = true
|
|
no_implicit_optional = true
|
|
python_version = "3.10"
|
|
|
|
[tool.flake8]
|
|
max-line-length = 88
|
|
extend-ignore = ["E203", "E501"]
|
|
|
|
[tool.semantic_release]
|
|
# for default values check:
|
|
# https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
|
|
|
|
version_source = "tag_only"
|
|
branch = "main"
|
|
|
|
# configure types which should trigger minor and patch version bumps respectively
|
|
# (note that they must be a subset of the configured allowed types):
|
|
parser_angular_allowed_types = "build,chore,ci,docs,feat,fix,perf,style,refactor,test"
|
|
parser_angular_minor_types = "feat"
|
|
parser_angular_patch_types = "fix,perf"
|