chore: switch to docling-core Markdown export (#14)

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-07-18 16:10:05 +02:00 committed by GitHub
parent 28d1c746a6
commit eb0b208272
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 7 additions and 10 deletions

View File

@ -3,7 +3,6 @@ from io import BytesIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union
from deepsearch.documents.core.export import export_to_markdown
from docling_core.types import BaseCell, BaseText from docling_core.types import BaseCell, BaseText
from docling_core.types import BoundingBox as DsBoundingBox from docling_core.types import BoundingBox as DsBoundingBox
from docling_core.types import Document as DsDocument from docling_core.types import Document as DsDocument
@ -299,9 +298,7 @@ class ConvertedDocument(BaseModel):
def render_as_markdown(self): def render_as_markdown(self):
if self.output: if self.output:
return export_to_markdown( return self.output.export_to_markdown()
self.output.model_dump(by_alias=True, exclude_none=True)
)
else: else:
return "" return ""

9
poetry.lock generated
View File

@ -707,13 +707,13 @@ files = [
[[package]] [[package]]
name = "docling-core" name = "docling-core"
version = "0.2.0" version = "1.1.0"
description = "A python library to define and validate data types in Docling." description = "A python library to define and validate data types in Docling."
optional = false optional = false
python-versions = "<4.0,>=3.9" python-versions = "<4.0,>=3.9"
files = [ files = [
{file = "docling_core-0.2.0-py3-none-any.whl", hash = "sha256:e8ff3af4f13a3a1709e2d0be8f1a258bfa71a820a70acab1a3b41f9c10e428b5"}, {file = "docling_core-1.1.0-py3-none-any.whl", hash = "sha256:80096ec6bbce9e616700ccd6bdd5a50e5d1a9a832d7968da3874d54b29962536"},
{file = "docling_core-0.2.0.tar.gz", hash = "sha256:3fa8920d12ce5cf687fc0c43b103a6c3a54a53f9eecdde9fad5dc5f0a0c76d6d"}, {file = "docling_core-1.1.0.tar.gz", hash = "sha256:69bc83d3b192d9e56bb91d77d8434d9fc109f8cb25ab5a285d2f3bccc10899cb"},
] ]
[package.dependencies] [package.dependencies]
@ -723,6 +723,7 @@ jsonschema = ">=4.16.0,<5.0.0"
poetry = ">=1.8.3,<2.0.0" poetry = ">=1.8.3,<2.0.0"
pydantic = ">=2.6.0,<3.0.0" pydantic = ">=2.6.0,<3.0.0"
pyproject-toml = ">=0.0.10,<0.0.11" pyproject-toml = ">=0.0.10,<0.0.11"
tabulate = ">=0.9.0,<0.10.0"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -4827,4 +4828,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "dc19329559f190dfe687b4ee272eb6dac66b3d9fe0398c95c2572e8c63fa23ac" content-hash = "a2de0e26ae072cae8ca01360b4242695427ae2493e860a0bc9e8c11d2d4cf58e"

View File

@ -23,10 +23,9 @@ packages = [{include = "docling"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = "^3.11"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = "^0.2.0" docling-core = "^1.1.0"
docling-ibm-models = "^0.2.0" docling-ibm-models = "^0.2.0"
deepsearch-glm = ">=0.19.0,<1" deepsearch-glm = ">=0.19.0,<1"
deepsearch-toolkit = ">=0.47.0,<1"
filetype = "^1.2.0" filetype = "^1.2.0"
pypdfium2 = "^4.30.0" pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0" pydantic-settings = "^2.3.0"