fix: Fixes for wordx (#432)
* fixes for referencing drawing blip in wordx Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Added safety try-except when trying to load pillow image from a docx blob. Added explicit dependency on lxml. Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Added test for word file with embedded emf images, re-generated full tests for docx, eased up dependency on lxml Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> * Updated lxml dependency version Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> --------- Signed-off-by: Maksym Lysak <mly@zurich.ibm.com> Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
d7072b4b56
commit
d0a1180478
@ -14,7 +14,8 @@ from docling_core.types.doc import (
|
|||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from PIL import Image
|
from lxml.etree import XPath
|
||||||
|
from PIL import Image, UnidentifiedImageError
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
@ -132,8 +133,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
|
def walk_linear(self, body, docx_obj, doc) -> DoclingDocument:
|
||||||
for element in body:
|
for element in body:
|
||||||
tag_name = etree.QName(element).localname
|
tag_name = etree.QName(element).localname
|
||||||
|
|
||||||
# Check for Inline Images (blip elements)
|
# Check for Inline Images (blip elements)
|
||||||
drawing_blip = element.xpath(".//a:blip")
|
namespaces = {
|
||||||
|
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
|
||||||
|
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
|
||||||
|
}
|
||||||
|
xpath_expr = XPath(".//a:blip", namespaces=namespaces)
|
||||||
|
drawing_blip = xpath_expr(element)
|
||||||
|
|
||||||
# Check for Tables
|
# Check for Tables
|
||||||
if element.tag.endswith("tbl"):
|
if element.tag.endswith("tbl"):
|
||||||
@ -210,7 +217,6 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
|
paragraph = docx.text.paragraph.Paragraph(element, docx_obj)
|
||||||
|
|
||||||
if paragraph.text is None:
|
if paragraph.text is None:
|
||||||
# _log.warn(f"paragraph has text==None")
|
|
||||||
return
|
return
|
||||||
text = paragraph.text.strip()
|
text = paragraph.text.strip()
|
||||||
# if len(text)==0 # keep empty paragraphs, they seperate adjacent lists!
|
# if len(text)==0 # keep empty paragraphs, they seperate adjacent lists!
|
||||||
@ -502,10 +508,17 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
image_data = get_docx_image(element, drawing_blip)
|
image_data = get_docx_image(element, drawing_blip)
|
||||||
image_bytes = BytesIO(image_data)
|
image_bytes = BytesIO(image_data)
|
||||||
# Open the BytesIO object with PIL to create an Image
|
# Open the BytesIO object with PIL to create an Image
|
||||||
pil_image = Image.open(image_bytes)
|
try:
|
||||||
doc.add_picture(
|
pil_image = Image.open(image_bytes)
|
||||||
parent=self.parents[self.level],
|
doc.add_picture(
|
||||||
image=ImageRef.from_pil(image=pil_image, dpi=72),
|
parent=self.parents[self.level],
|
||||||
caption=None,
|
image=ImageRef.from_pil(image=pil_image, dpi=72),
|
||||||
)
|
caption=None,
|
||||||
|
)
|
||||||
|
except (UnidentifiedImageError, OSError) as e:
|
||||||
|
_log.warning("Warning: image cannot be loaded by Pillow")
|
||||||
|
doc.add_picture(
|
||||||
|
parent=self.parents[self.level],
|
||||||
|
caption=None,
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
245
poetry.lock
generated
245
poetry.lock
generated
@ -2212,157 +2212,111 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lxml"
|
name = "lxml"
|
||||||
version = "5.3.0"
|
version = "4.9.4"
|
||||||
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*"
|
||||||
files = [
|
files = [
|
||||||
{file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"},
|
{file = "lxml-4.9.4-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e214025e23db238805a600f1f37bf9f9a15413c7bf5f9d6ae194f84980c78722"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"},
|
{file = "lxml-4.9.4-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ec53a09aee61d45e7dbe7e91252ff0491b6b5fee3d85b2d45b173d8ab453efc1"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501d0d7e26b4d261fca8132854d845e4988097611ba2531408ec91cf3fd9d20a"},
|
{file = "lxml-4.9.4-cp27-cp27m-win32.whl", hash = "sha256:7d1d6c9e74c70ddf524e3c09d9dc0522aba9370708c2cb58680ea40174800013"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8"},
|
{file = "lxml-4.9.4-cp27-cp27m-win_amd64.whl", hash = "sha256:cb53669442895763e61df5c995f0e8361b61662f26c1b04ee82899c2789c8f69"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e41506fec7a7f9405b14aa2d5c8abbb4dbbd09d88f9496958b6d00cb4d45330"},
|
{file = "lxml-4.9.4-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:647bfe88b1997d7ae8d45dabc7c868d8cb0c8412a6e730a7651050b8c7289cf2"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7d4a670107d75dfe5ad080bed6c341d18c4442f9378c9f58e5851e86eb79965"},
|
{file = "lxml-4.9.4-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:4d973729ce04784906a19108054e1fd476bc85279a403ea1a72fdb051c76fa48"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ce1f1e2c7755abfc7e759dc34d7d05fd221723ff822947132dc934d122fe22"},
|
{file = "lxml-4.9.4-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:056a17eaaf3da87a05523472ae84246f87ac2f29a53306466c22e60282e54ff8"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:44264ecae91b30e5633013fb66f6ddd05c006d3e0e884f75ce0b4755b3e3847b"},
|
{file = "lxml-4.9.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:aaa5c173a26960fe67daa69aa93d6d6a1cd714a6eb13802d4e4bd1d24a530644"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:3c174dc350d3ec52deb77f2faf05c439331d6ed5e702fc247ccb4e6b62d884b7"},
|
{file = "lxml-4.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:647459b23594f370c1c01768edaa0ba0959afc39caeeb793b43158bb9bb6a663"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:2dfab5fa6a28a0b60a20638dc48e6343c02ea9933e3279ccb132f555a62323d8"},
|
{file = "lxml-4.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:bdd9abccd0927673cffe601d2c6cdad1c9321bf3437a2f507d6b037ef91ea307"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b1c8c20847b9f34e98080da785bb2336ea982e7f913eed5809e5a3c872900f32"},
|
{file = "lxml-4.9.4-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:00e91573183ad273e242db5585b52670eddf92bacad095ce25c1e682da14ed91"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2c86bf781b12ba417f64f3422cfc302523ac9cd1d8ae8c0f92a1c66e56ef2e86"},
|
{file = "lxml-4.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a602ed9bd2c7d85bd58592c28e101bd9ff9c718fbde06545a70945ffd5d11868"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c162b216070f280fa7da844531169be0baf9ccb17263cf5a8bf876fcd3117fa5"},
|
{file = "lxml-4.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:de362ac8bc962408ad8fae28f3967ce1a262b5d63ab8cefb42662566737f1dc7"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:36aef61a1678cb778097b4a6eeae96a69875d51d1e8f4d4b491ab3cfb54b5a03"},
|
{file = "lxml-4.9.4-cp310-cp310-win32.whl", hash = "sha256:33714fcf5af4ff7e70a49731a7cc8fd9ce910b9ac194f66eaa18c3cc0a4c02be"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f65e5120863c2b266dbcc927b306c5b78e502c71edf3295dfcb9501ec96e5fc7"},
|
{file = "lxml-4.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:d3caa09e613ece43ac292fbed513a4bce170681a447d25ffcbc1b647d45a39c5"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-win32.whl", hash = "sha256:ef0c1fe22171dd7c7c27147f2e9c3e86f8bdf473fed75f16b0c2e84a5030ce80"},
|
{file = "lxml-4.9.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:359a8b09d712df27849e0bcb62c6a3404e780b274b0b7e4c39a88826d1926c28"},
|
||||||
{file = "lxml-5.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:052d99051e77a4f3e8482c65014cf6372e61b0a6f4fe9edb98503bb5364cfee3"},
|
{file = "lxml-4.9.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:43498ea734ccdfb92e1886dfedaebeb81178a241d39a79d5351ba2b671bff2b2"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74bcb423462233bc5d6066e4e98b0264e7c1bed7541fff2f4e34fe6b21563c8b"},
|
{file = "lxml-4.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4855161013dfb2b762e02b3f4d4a21cc7c6aec13c69e3bffbf5022b3e708dd97"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a3d819eb6f9b8677f57f9664265d0a10dd6551d227afb4af2b9cd7bdc2ccbf18"},
|
{file = "lxml-4.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:c71b5b860c5215fdbaa56f715bc218e45a98477f816b46cfde4a84d25b13274e"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8f5db71b28b8c404956ddf79575ea77aa8b1538e8b2ef9ec877945b3f46442"},
|
{file = "lxml-4.9.4-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9a2b5915c333e4364367140443b59f09feae42184459b913f0f41b9fed55794a"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3406b63232fc7e9b8783ab0b765d7c59e7c59ff96759d8ef9632fca27c7ee4"},
|
{file = "lxml-4.9.4-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d82411dbf4d3127b6cde7da0f9373e37ad3a43e89ef374965465928f01c2b979"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ecdd78ab768f844c7a1d4a03595038c166b609f6395e25af9b0f3f26ae1230f"},
|
{file = "lxml-4.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:273473d34462ae6e97c0f4e517bd1bf9588aa67a1d47d93f760a1282640e24ac"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168f2dfcfdedf611eb285efac1516c8454c8c99caf271dccda8943576b67552e"},
|
{file = "lxml-4.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:389d2b2e543b27962990ab529ac6720c3dded588cc6d0f6557eec153305a3622"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa617107a410245b8660028a7483b68e7914304a6d4882b5ff3d2d3eb5948d8c"},
|
{file = "lxml-4.9.4-cp311-cp311-win32.whl", hash = "sha256:8aecb5a7f6f7f8fe9cac0bcadd39efaca8bbf8d1bf242e9f175cbe4c925116c3"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:69959bd3167b993e6e710b99051265654133a98f20cec1d9b493b931942e9c16"},
|
{file = "lxml-4.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:c7721a3ef41591341388bb2265395ce522aba52f969d33dacd822da8f018aff8"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:bd96517ef76c8654446fc3db9242d019a1bb5fe8b751ba414765d59f99210b79"},
|
{file = "lxml-4.9.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:dbcb2dc07308453db428a95a4d03259bd8caea97d7f0776842299f2d00c72fc8"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ab6dd83b970dc97c2d10bc71aa925b84788c7c05de30241b9e96f9b6d9ea3080"},
|
{file = "lxml-4.9.4-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:01bf1df1db327e748dcb152d17389cf6d0a8c5d533ef9bab781e9d5037619229"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eec1bb8cdbba2925bedc887bc0609a80e599c75b12d87ae42ac23fd199445654"},
|
{file = "lxml-4.9.4-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e8f9f93a23634cfafbad6e46ad7d09e0f4a25a2400e4a64b1b7b7c0fbaa06d9d"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a7095eeec6f89111d03dabfe5883a1fd54da319c94e0fb104ee8f23616b572d"},
|
{file = "lxml-4.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3f3f00a9061605725df1816f5713d10cd94636347ed651abdbc75828df302b20"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6f651ebd0b21ec65dfca93aa629610a0dbc13dbc13554f19b0113da2e61a4763"},
|
{file = "lxml-4.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:953dd5481bd6252bd480d6ec431f61d7d87fdcbbb71b0d2bdcfc6ae00bb6fb10"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f422a209d2455c56849442ae42f25dbaaba1c6c3f501d58761c619c7836642ec"},
|
{file = "lxml-4.9.4-cp312-cp312-win32.whl", hash = "sha256:266f655d1baff9c47b52f529b5f6bec33f66042f65f7c56adde3fcf2ed62ae8b"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62f7fdb0d1ed2065451f086519865b4c90aa19aed51081979ecd05a21eb4d1be"},
|
{file = "lxml-4.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:f1faee2a831fe249e1bae9cbc68d3cd8a30f7e37851deee4d7962b17c410dd56"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-win32.whl", hash = "sha256:c6379f35350b655fd817cd0d6cbeef7f265f3ae5fedb1caae2eb442bbeae9ab9"},
|
{file = "lxml-4.9.4-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:23d891e5bdc12e2e506e7d225d6aa929e0a0368c9916c1fddefab88166e98b20"},
|
||||||
{file = "lxml-5.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c52100e2c2dbb0649b90467935c4b0de5528833c76a35ea1a2691ec9f1ee7a1"},
|
{file = "lxml-4.9.4-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:e96a1788f24d03e8d61679f9881a883ecdf9c445a38f9ae3f3f193ab6c591c66"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e99f5507401436fdcc85036a2e7dc2e28d962550afe1cbfc07c40e454256a859"},
|
{file = "lxml-4.9.4-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:5557461f83bb7cc718bc9ee1f7156d50e31747e5b38d79cf40f79ab1447afd2d"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:384aacddf2e5813a36495233b64cb96b1949da72bef933918ba5c84e06af8f0e"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:fdb325b7fba1e2c40b9b1db407f85642e32404131c08480dd652110fc908561b"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a216bf6afaf97c263b56371434e47e2c652d215788396f60477540298218f"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d74d4a3c4b8f7a1f676cedf8e84bcc57705a6d7925e6daef7a1e54ae543a197"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65ab5685d56914b9a2a34d67dd5488b83213d680b0c5d10b47f81da5a16b0b0e"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ac7674d1638df129d9cb4503d20ffc3922bd463c865ef3cb412f2c926108e9a4"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aac0bbd3e8dd2d9c45ceb82249e8bdd3ac99131a32b4d35c8af3cc9db1657179"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:ddd92e18b783aeb86ad2132d84a4b795fc5ec612e3545c1b687e7747e66e2b53"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b369d3db3c22ed14c75ccd5af429086f166a19627e84a8fdade3f8f31426e52a"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2bd9ac6e44f2db368ef8986f3989a4cad3de4cd55dbdda536e253000c801bcc7"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24037349665434f375645fa9d1f5304800cec574d0310f618490c871fd902b3"},
|
{file = "lxml-4.9.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bc354b1393dce46026ab13075f77b30e40b61b1a53e852e99d3cc5dd1af4bc85"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:62d172f358f33a26d6b41b28c170c63886742f5b6772a42b59b4f0fa10526cb1"},
|
{file = "lxml-4.9.4-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:f836f39678cb47c9541f04d8ed4545719dc31ad850bf1832d6b4171e30d65d23"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:c1f794c02903c2824fccce5b20c339a1a14b114e83b306ff11b597c5f71a1c8d"},
|
{file = "lxml-4.9.4-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:9c131447768ed7bc05a02553d939e7f0e807e533441901dd504e217b76307745"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:5d6a6972b93c426ace71e0be9a6f4b2cfae9b1baed2eed2006076a746692288c"},
|
{file = "lxml-4.9.4-cp36-cp36m-win32.whl", hash = "sha256:bafa65e3acae612a7799ada439bd202403414ebe23f52e5b17f6ffc2eb98c2be"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3879cc6ce938ff4eb4900d901ed63555c778731a96365e53fadb36437a131a99"},
|
{file = "lxml-4.9.4-cp36-cp36m-win_amd64.whl", hash = "sha256:6197c3f3c0b960ad033b9b7d611db11285bb461fc6b802c1dd50d04ad715c225"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:74068c601baff6ff021c70f0935b0c7bc528baa8ea210c202e03757c68c5a4ff"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:7b378847a09d6bd46047f5f3599cdc64fcb4cc5a5a2dd0a2af610361fbe77b16"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ecd4ad8453ac17bc7ba3868371bffb46f628161ad0eefbd0a855d2c8c32dd81a"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:1343df4e2e6e51182aad12162b23b0a4b3fd77f17527a78c53f0f23573663545"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7e2f58095acc211eb9d8b5771bf04df9ff37d6b87618d1cbf85f92399c98dae8"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6dbdacf5752fbd78ccdb434698230c4f0f95df7dd956d5f205b5ed6911a1367c"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e63601ad5cd8f860aa99d109889b5ac34de571c7ee902d6812d5d9ddcc77fa7d"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:506becdf2ecaebaf7f7995f776394fcc8bd8a78022772de66677c84fb02dd33d"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-win32.whl", hash = "sha256:17e8d968d04a37c50ad9c456a286b525d78c4a1c15dd53aa46c1d8e06bf6fa30"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ca8e44b5ba3edb682ea4e6185b49661fc22b230cf811b9c13963c9f982d1d964"},
|
||||||
{file = "lxml-5.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:c1a69e58a6bb2de65902051d57fde951febad631a20a64572677a1052690482f"},
|
{file = "lxml-4.9.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:9d9d5726474cbbef279fd709008f91a49c4f758bec9c062dfbba88eab00e3ff9"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c72e9563347c7395910de6a3100a4840a75a6f60e05af5e58566868d5eb2d6a"},
|
{file = "lxml-4.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:bbdd69e20fe2943b51e2841fc1e6a3c1de460d630f65bde12452d8c97209464d"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e92ce66cd919d18d14b3856906a61d3f6b6a8500e0794142338da644260595cd"},
|
{file = "lxml-4.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8671622256a0859f5089cbe0ce4693c2af407bc053dcc99aadff7f5310b4aa02"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d04f064bebdfef9240478f7a779e8c5dc32b8b7b0b2fc6a62e39b928d428e51"},
|
{file = "lxml-4.9.4-cp37-cp37m-win32.whl", hash = "sha256:dd4fda67f5faaef4f9ee5383435048ee3e11ad996901225ad7615bc92245bc8e"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2fb570d7823c2bbaf8b419ba6e5662137f8166e364a8b2b91051a1fb40ab8b"},
|
{file = "lxml-4.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6bee9c2e501d835f91460b2c904bc359f8433e96799f5c2ff20feebd9bb1e590"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c120f43553ec759f8de1fee2f4794452b0946773299d44c36bfe18e83caf002"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:1f10f250430a4caf84115b1e0f23f3615566ca2369d1962f82bef40dd99cd81a"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:562e7494778a69086f0312ec9689f6b6ac1c6b65670ed7d0267e49f57ffa08c4"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:3b505f2bbff50d261176e67be24e8909e54b5d9d08b12d4946344066d66b3e43"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:423b121f7e6fa514ba0c7918e56955a1d4470ed35faa03e3d9f0e3baa4c7e492"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:1449f9451cd53e0fd0a7ec2ff5ede4686add13ac7a7bfa6988ff6d75cff3ebe2"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c00f323cc00576df6165cc9d21a4c21285fa6b9989c5c39830c3903dc4303ef3"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:4ece9cca4cd1c8ba889bfa67eae7f21d0d1a2e715b4d5045395113361e8c533d"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:1fdc9fae8dd4c763e8a31e7630afef517eab9f5d5d31a278df087f307bf601f4"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:59bb5979f9941c61e907ee571732219fa4774d5a18f3fa5ff2df963f5dfaa6bc"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:658f2aa69d31e09699705949b5fc4719cbecbd4a97f9656a232e7d6c7be1a367"},
|
{file = "lxml-4.9.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b1980dbcaad634fe78e710c8587383e6e3f61dbe146bcbfd13a9c8ab2d7b1192"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1473427aff3d66a3fa2199004c3e601e6c4500ab86696edffdbc84954c72d832"},
|
{file = "lxml-4.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9ae6c3363261021144121427b1552b29e7b59de9d6a75bf51e03bc072efb3c37"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a87de7dd873bf9a792bf1e58b1c3887b9264036629a5bf2d2e6579fe8e73edff"},
|
{file = "lxml-4.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bcee502c649fa6351b44bb014b98c09cb00982a475a1912a9881ca28ab4f9cd9"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0d7b36afa46c97875303a94e8f3ad932bf78bace9e18e603f2085b652422edcd"},
|
{file = "lxml-4.9.4-cp38-cp38-win32.whl", hash = "sha256:a8edae5253efa75c2fc79a90068fe540b197d1c7ab5803b800fccfe240eed33c"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cf120cce539453ae086eacc0130a324e7026113510efa83ab42ef3fcfccac7fb"},
|
{file = "lxml-4.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:701847a7aaefef121c5c0d855b2affa5f9bd45196ef00266724a80e439220e46"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:df5c7333167b9674aa8ae1d4008fa4bc17a313cc490b2cca27838bbdcc6bb15b"},
|
{file = "lxml-4.9.4-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:f610d980e3fccf4394ab3806de6065682982f3d27c12d4ce3ee46a8183d64a6a"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-win32.whl", hash = "sha256:c802e1c2ed9f0c06a65bc4ed0189d000ada8049312cfeab6ca635e39c9608957"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:aa9b5abd07f71b081a33115d9758ef6077924082055005808f68feccb27616bd"},
|
||||||
{file = "lxml-5.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:406246b96d552e0503e17a1006fd27edac678b3fcc9f1be71a2f94b4ff61528d"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:365005e8b0718ea6d64b374423e870648ab47c3a905356ab6e5a5ff03962b9a9"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8f0de2d390af441fe8b2c12626d103540b5d850d585b18fcada58d972b74a74e"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:16b9ec51cc2feab009e800f2c6327338d6ee4e752c76e95a35c4465e80390ccd"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1afe0a8c353746e610bd9031a630a95bcfb1a720684c3f2b36c4710a0a96528f"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:a905affe76f1802edcac554e3ccf68188bea16546071d7583fb1b693f9cf756b"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56b9861a71575f5795bde89256e7467ece3d339c9b43141dbdd54544566b3b94"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd814847901df6e8de13ce69b84c31fc9b3fb591224d6762d0b256d510cbf382"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:9fb81d2824dff4f2e297a276297e9031f46d2682cafc484f49de182aa5e5df99"},
|
{file = "lxml-4.9.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91bbf398ac8bb7d65a5a52127407c05f75a18d7015a270fdd94bbcb04e65d573"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2c226a06ecb8cdef28845ae976da407917542c5e6e75dcac7cc33eb04aaeb237"},
|
{file = "lxml-4.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f99768232f036b4776ce419d3244a04fe83784bce871b16d2c2e984c7fcea847"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:7d3d1ca42870cdb6d0d29939630dbe48fa511c203724820fc0fd507b2fb46577"},
|
{file = "lxml-4.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bb5bd6212eb0edfd1e8f254585290ea1dadc3687dd8fd5e2fd9a87c31915cdab"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-win32.whl", hash = "sha256:094cb601ba9f55296774c2d57ad68730daa0b13dc260e1f941b4d13678239e70"},
|
{file = "lxml-4.9.4-cp39-cp39-win32.whl", hash = "sha256:88f7c383071981c74ec1998ba9b437659e4fd02a3c4a4d3efc16774eb108d0ec"},
|
||||||
{file = "lxml-5.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:eafa2c8658f4e560b098fe9fc54539f86528651f61849b22111a9b107d18910c"},
|
{file = "lxml-4.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:936e8880cc00f839aa4173f94466a8406a96ddce814651075f95837316369899"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb83f8a875b3d9b458cada4f880fa498646874ba4011dc974e071a0a84a1b033"},
|
{file = "lxml-4.9.4-pp310-pypy310_pp73-macosx_11_0_x86_64.whl", hash = "sha256:f6c35b2f87c004270fa2e703b872fcc984d714d430b305145c39d53074e1ffe0"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25f1b69d41656b05885aa185f5fdf822cb01a586d1b32739633679699f220391"},
|
{file = "lxml-4.9.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:606d445feeb0856c2b424405236a01c71af7c97e5fe42fbc778634faef2b47e4"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23e0553b8055600b3bf4a00b255ec5c92e1e4aebf8c2c09334f8368e8bd174d6"},
|
{file = "lxml-4.9.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1bdcbebd4e13446a14de4dd1825f1e778e099f17f79718b4aeaf2403624b0f7"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ada35dd21dc6c039259596b358caab6b13f4db4d4a7f8665764d616daf9cc1d"},
|
{file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:0a08c89b23117049ba171bf51d2f9c5f3abf507d65d016d6e0fa2f37e18c0fc5"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:81b4e48da4c69313192d8c8d4311e5d818b8be1afe68ee20f6385d0e96fc9512"},
|
{file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:232fd30903d3123be4c435fb5159938c6225ee8607b635a4d3fca847003134ba"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:2bc9fd5ca4729af796f9f59cd8ff160fe06a474da40aca03fcc79655ddee1a8b"},
|
{file = "lxml-4.9.4-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:231142459d32779b209aa4b4d460b175cadd604fed856f25c1571a9d78114771"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07da23d7ee08577760f0a71d67a861019103e4812c87e2fab26b039054594cc5"},
|
{file = "lxml-4.9.4-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:520486f27f1d4ce9654154b4494cf9307b495527f3a2908ad4cb48e4f7ed7ef7"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ea2e2f6f801696ad7de8aec061044d6c8c0dd4037608c7cab38a9a4d316bfb11"},
|
{file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:562778586949be7e0d7435fcb24aca4810913771f845d99145a6cee64d5b67ca"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-win32.whl", hash = "sha256:5c54afdcbb0182d06836cc3d1be921e540be3ebdf8b8a51ee3ef987537455f84"},
|
{file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:a9e7c6d89c77bb2770c9491d988f26a4b161d05c8ca58f63fb1f1b6b9a74be45"},
|
||||||
{file = "lxml-5.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f2901429da1e645ce548bf9171784c0f74f0718c3f6150ce166be39e4dd66c3e"},
|
{file = "lxml-4.9.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:786d6b57026e7e04d184313c1359ac3d68002c33e4b1042ca58c362f1d09ff58"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c56a1d43b2f9ee4786e4658c7903f05da35b923fb53c11025712562d5cc02753"},
|
{file = "lxml-4.9.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95ae6c5a196e2f239150aa4a479967351df7f44800c93e5a975ec726fef005e2"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee8c39582d2652dcd516d1b879451500f8db3fe3607ce45d7c5957ab2596040"},
|
{file = "lxml-4.9.4-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:9b556596c49fa1232b0fff4b0e69b9d4083a502e60e404b44341e2f8fb7187f5"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdf3a3059611f7585a78ee10399a15566356116a4288380921a4b598d807a22"},
|
{file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:cc02c06e9e320869d7d1bd323df6dd4281e78ac2e7f8526835d3d48c69060683"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:146173654d79eb1fc97498b4280c1d3e1e5d58c398fa530905c9ea50ea849b22"},
|
{file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:857d6565f9aa3464764c2cb6a2e3c2e75e1970e877c188f4aeae45954a314e0c"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0a7056921edbdd7560746f4221dca89bb7a3fe457d3d74267995253f46343f15"},
|
{file = "lxml-4.9.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c42ae7e010d7d6bc51875d768110c10e8a59494855c3d4c348b068f5fb81fdcd"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:9e4b47ac0f5e749cfc618efdf4726269441014ae1d5583e047b452a32e221920"},
|
{file = "lxml-4.9.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f10250bb190fb0742e3e1958dd5c100524c2cc5096c67c8da51233f7448dc137"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f914c03e6a31deb632e2daa881fe198461f4d06e57ac3d0e05bbcab8eae01945"},
|
{file = "lxml-4.9.4.tar.gz", hash = "sha256:b1541e50b78e15fa06a2670157a1962ef06591d4c998b998047fff5e3236880e"},
|
||||||
{file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:213261f168c5e1d9b7535a67e68b1f59f92398dd17a56d934550837143f79c42"},
|
|
||||||
{file = "lxml-5.3.0-cp38-cp38-win32.whl", hash = "sha256:218c1b2e17a710e363855594230f44060e2025b05c80d1f0661258142b2add2e"},
|
|
||||||
{file = "lxml-5.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:315f9542011b2c4e1d280e4a20ddcca1761993dda3afc7a73b01235f8641e903"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ffc23010330c2ab67fac02781df60998ca8fe759e8efde6f8b756a20599c5de"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2b3778cb38212f52fac9fe913017deea2fdf4eb1a4f8e4cfc6b009a13a6d3fcc"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b0c7a688944891086ba192e21c5229dea54382f4836a209ff8d0a660fac06be"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:747a3d3e98e24597981ca0be0fd922aebd471fa99d0043a3842d00cdcad7ad6a"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86a6b24b19eaebc448dc56b87c4865527855145d851f9fc3891673ff97950540"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b11a5d918a6216e521c715b02749240fb07ae5a1fefd4b7bf12f833bc8b4fe70"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b87753c784d6acb8a25b05cb526c3406913c9d988d51f80adecc2b0775d6aa"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:109fa6fede314cc50eed29e6e56c540075e63d922455346f11e4d7a036d2b8cf"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02ced472497b8362c8e902ade23e3300479f4f43e45f4105c85ef43b8db85229"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:6b038cc86b285e4f9fea2ba5ee76e89f21ed1ea898e287dc277a25884f3a7dfe"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:7437237c6a66b7ca341e868cda48be24b8701862757426852c9b3186de1da8a2"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7f41026c1d64043a36fda21d64c5026762d53a77043e73e94b71f0521939cc71"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:482c2f67761868f0108b1743098640fbb2a28a8e15bf3f47ada9fa59d9fe08c3"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1483fd3358963cc5c1c9b122c80606a3a79ee0875bcac0204149fa09d6ff2727"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dec2d1130a9cda5b904696cec33b2cfb451304ba9081eeda7f90f724097300a"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-win32.whl", hash = "sha256:a0eabd0a81625049c5df745209dc7fcef6e2aea7793e5f003ba363610aa0a3ff"},
|
|
||||||
{file = "lxml-5.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:89e043f1d9d341c52bf2af6d02e6adde62e0a46e6755d5eb60dc6e4f0b8aeca2"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7b1cd427cb0d5f7393c31b7496419da594fe600e6fdc4b105a54f82405e6626c"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51806cfe0279e06ed8500ce19479d757db42a30fd509940b1701be9c86a5ff9a"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee70d08fd60c9565ba8190f41a46a54096afa0eeb8f76bd66f2c25d3b1b83005"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:8dc2c0395bea8254d8daebc76dcf8eb3a95ec2a46fa6fae5eaccee366bfe02ce"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6ba0d3dcac281aad8a0e5b14c7ed6f9fa89c8612b47939fc94f80b16e2e9bc83"},
|
|
||||||
{file = "lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:94d6c3782907b5e40e21cadf94b13b0842ac421192f26b84c45f13f3c9d5dc27"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c300306673aa0f3ed5ed9372b21867690a17dba38c68c44b287437c362ce486b"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d9b952e07aed35fe2e1a7ad26e929595412db48535921c5013edc8aa4a35ce"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:01220dca0d066d1349bd6a1726856a78f7929f3878f7e2ee83c296c69495309e"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2d9b8d9177afaef80c53c0a9e30fa252ff3036fb1c6494d427c066a4ce6a282f"},
|
|
||||||
{file = "lxml-5.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:20094fc3f21ea0a8669dc4c61ed7fa8263bd37d97d93b90f28fc613371e7a875"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ace2c2326a319a0bb8a8b0e5b570c764962e95818de9f259ce814ee666603f19"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92e67a0be1639c251d21e35fe74df6bcc40cba445c2cda7c4a967656733249e2"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5350b55f9fecddc51385463a4f67a5da829bc741e38cf689f38ec9023f54ab"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c1fefd7e3d00921c44dc9ca80a775af49698bbfd92ea84498e56acffd4c5469"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:71a8dd38fbd2f2319136d4ae855a7078c69c9a38ae06e0c17c73fd70fc6caad8"},
|
|
||||||
{file = "lxml-5.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:97acf1e1fd66ab53dacd2c35b319d7e548380c2e9e8c54525c6e76d21b1ae3b1"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:68934b242c51eb02907c5b81d138cb977b2129a0a75a8f8b60b01cb8586c7b21"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b710bc2b8292966b23a6a0121f7a6c51d45d2347edcc75f016ac123b8054d3f2"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18feb4b93302091b1541221196a2155aa296c363fd233814fa11e181adebc52f"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:3eb44520c4724c2e1a57c0af33a379eee41792595023f367ba3952a2d96c2aab"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:609251a0ca4770e5a8768ff902aa02bf636339c5a93f9349b48eb1f606f7f3e9"},
|
|
||||||
{file = "lxml-5.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:516f491c834eb320d6c843156440fe7fc0d50b33e44387fcec5b02f0bc118a4c"},
|
|
||||||
{file = "lxml-5.3.0.tar.gz", hash = "sha256:4e109ca30d1edec1ac60cdbe341905dc3b8f55b16855e03a54aaf59e51ec8c6f"},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
cssselect = ["cssselect (>=0.7)"]
|
cssselect = ["cssselect (>=0.7)"]
|
||||||
html-clean = ["lxml-html-clean"]
|
|
||||||
html5 = ["html5lib"]
|
html5 = ["html5lib"]
|
||||||
htmlsoup = ["BeautifulSoup4"]
|
htmlsoup = ["BeautifulSoup4"]
|
||||||
source = ["Cython (>=3.0.11)"]
|
source = ["Cython (==0.29.37)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markdown"
|
name = "markdown"
|
||||||
@ -5761,6 +5715,11 @@ files = [
|
|||||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
|
||||||
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
|
||||||
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
{file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
|
||||||
|
{file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
|
||||||
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
{file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
|
||||||
@ -7353,4 +7312,4 @@ tesserocr = ["tesserocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.9"
|
python-versions = "^3.9"
|
||||||
content-hash = "6772098f9951e636c03d958b81f7df3ae0e8746be558fd031aeaa67f9bb45a79"
|
content-hash = "d824c81c132de19beea5d5e395d70c1b9566bf5b938fec3d12ad478b953fd639"
|
||||||
|
@ -48,6 +48,7 @@ beautifulsoup4 = "^4.12.3"
|
|||||||
pandas = "^2.1.4"
|
pandas = "^2.1.4"
|
||||||
marko = "^2.1.2"
|
marko = "^2.1.2"
|
||||||
openpyxl = "^3.1.5"
|
openpyxl = "^3.1.5"
|
||||||
|
lxml = ">=4.0.0,<6.0.0"
|
||||||
ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
|
ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
|
BIN
tests/data/docx/test_emf_docx.docx
Normal file
BIN
tests/data/docx/test_emf_docx.docx
Normal file
Binary file not shown.
10
tests/data/groundtruth/docling_v2/tablecell.docx.itxt
Normal file
10
tests/data/groundtruth/docling_v2/tablecell.docx.itxt
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: list: group list
|
||||||
|
item-2 at level 2: list_item: Hello world1
|
||||||
|
item-3 at level 2: list_item: Hello2
|
||||||
|
item-4 at level 1: paragraph:
|
||||||
|
item-5 at level 1: paragraph: Some text before
|
||||||
|
item-6 at level 1: table with [3x3]
|
||||||
|
item-7 at level 1: paragraph:
|
||||||
|
item-8 at level 1: paragraph:
|
||||||
|
item-9 at level 1: paragraph: Some text after
|
392
tests/data/groundtruth/docling_v2/tablecell.docx.json
Normal file
392
tests/data/groundtruth/docling_v2/tablecell.docx.json
Normal file
@ -0,0 +1,392 @@
|
|||||||
|
{
|
||||||
|
"schema_name": "DoclingDocument",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"name": "tablecell",
|
||||||
|
"origin": {
|
||||||
|
"mimetype": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
"binary_hash": 1111850039819445035,
|
||||||
|
"filename": "tablecell.docx"
|
||||||
|
},
|
||||||
|
"furniture": {
|
||||||
|
"self_ref": "#/furniture",
|
||||||
|
"children": [],
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"body": {
|
||||||
|
"self_ref": "#/body",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/tables/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/5"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name": "list",
|
||||||
|
"label": "list"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"texts": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Hello world1",
|
||||||
|
"text": "Hello world1",
|
||||||
|
"enumerated": false,
|
||||||
|
"marker": "-"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "list_item",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Hello2",
|
||||||
|
"text": "Hello2",
|
||||||
|
"enumerated": false,
|
||||||
|
"marker": "-"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/2",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "paragraph",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "",
|
||||||
|
"text": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/3",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "paragraph",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Some text before",
|
||||||
|
"text": "Some text before"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/4",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "paragraph",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "",
|
||||||
|
"text": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/5",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "paragraph",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "",
|
||||||
|
"text": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/6",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "paragraph",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Some text after",
|
||||||
|
"text": "Some text after"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pictures": [],
|
||||||
|
"tables": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/tables/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"label": "table",
|
||||||
|
"prov": [],
|
||||||
|
"captions": [],
|
||||||
|
"references": [],
|
||||||
|
"footnotes": [],
|
||||||
|
"data": {
|
||||||
|
"table_cells": [
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "Tab1",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "Tab2",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "Tab3",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "A",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "B",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "C",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "D",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "E",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "F",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"num_rows": 3,
|
||||||
|
"num_cols": 3,
|
||||||
|
"grid": [
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "Tab1",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "Tab2",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 0,
|
||||||
|
"end_row_offset_idx": 1,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "Tab3",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "A",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "B",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 1,
|
||||||
|
"end_row_offset_idx": 2,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "C",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 0,
|
||||||
|
"end_col_offset_idx": 1,
|
||||||
|
"text": "D",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 1,
|
||||||
|
"end_col_offset_idx": 2,
|
||||||
|
"text": "E",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"row_span": 1,
|
||||||
|
"col_span": 1,
|
||||||
|
"start_row_offset_idx": 2,
|
||||||
|
"end_row_offset_idx": 3,
|
||||||
|
"start_col_offset_idx": 2,
|
||||||
|
"end_col_offset_idx": 3,
|
||||||
|
"text": "F",
|
||||||
|
"column_header": false,
|
||||||
|
"row_header": false,
|
||||||
|
"row_section": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"key_value_items": [],
|
||||||
|
"pages": {}
|
||||||
|
}
|
11
tests/data/groundtruth/docling_v2/tablecell.docx.md
Normal file
11
tests/data/groundtruth/docling_v2/tablecell.docx.md
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
- Hello world1
|
||||||
|
- Hello2
|
||||||
|
|
||||||
|
Some text before
|
||||||
|
|
||||||
|
| Tab1 | Tab2 | Tab3 |
|
||||||
|
|--------|--------|--------|
|
||||||
|
| A | B | C |
|
||||||
|
| D | E | F |
|
||||||
|
|
||||||
|
Some text after
|
@ -0,0 +1,8 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: paragraph: Test with three images in unusual formats
|
||||||
|
item-2 at level 1: paragraph: Raster in emf:
|
||||||
|
item-3 at level 1: picture
|
||||||
|
item-4 at level 1: paragraph: Vector in emf:
|
||||||
|
item-5 at level 1: picture
|
||||||
|
item-6 at level 1: paragraph: Raster in webp:
|
||||||
|
item-7 at level 1: picture
|
144
tests/data/groundtruth/docling_v2/test_emf_docx.docx.json
Normal file
144
tests/data/groundtruth/docling_v2/test_emf_docx.docx.json
Normal file
File diff suppressed because one or more lines are too long
13
tests/data/groundtruth/docling_v2/test_emf_docx.docx.md
Normal file
13
tests/data/groundtruth/docling_v2/test_emf_docx.docx.md
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
Test with three images in unusual formats
|
||||||
|
|
||||||
|
Raster in emf:
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
Vector in emf:
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
Raster in webp:
|
||||||
|
|
||||||
|
<!-- image -->
|
1
tests/data/groundtruth/docling_v2/word_sample.json
Normal file
1
tests/data/groundtruth/docling_v2/word_sample.json
Normal file
File diff suppressed because one or more lines are too long
45
tests/data/groundtruth/docling_v2/word_sample.md
Normal file
45
tests/data/groundtruth/docling_v2/word_sample.md
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
Summer activities
|
||||||
|
|
||||||
|
# Swimming in the lake
|
||||||
|
|
||||||
|
Duck
|
||||||
|
|
||||||
|
<!-- image -->
|
||||||
|
|
||||||
|
Figure 1: This is a cute duckling
|
||||||
|
|
||||||
|
## Let’s swim!
|
||||||
|
|
||||||
|
To get started with swimming, first lay down in a water and try not to drown:
|
||||||
|
|
||||||
|
- You can relax and look around
|
||||||
|
- Paddle about
|
||||||
|
- Enjoy summer warmth
|
||||||
|
|
||||||
|
Also, don’t forget:
|
||||||
|
|
||||||
|
- Wear sunglasses
|
||||||
|
- Don’t forget to drink water
|
||||||
|
- Use sun cream
|
||||||
|
|
||||||
|
Hmm, what else…
|
||||||
|
|
||||||
|
### Let’s eat
|
||||||
|
|
||||||
|
After we had a good day of swimming in the lake, it’s important to eat something nice
|
||||||
|
|
||||||
|
I like to eat leaves
|
||||||
|
|
||||||
|
Here are some interesting things a respectful duck could eat:
|
||||||
|
|
||||||
|
| | Food | Calories per portion |
|
||||||
|
|---------|----------------------------------|------------------------|
|
||||||
|
| Leaves | Ash, Elm, Maple | 50 |
|
||||||
|
| Berries | Blueberry, Strawberry, Cranberry | 150 |
|
||||||
|
| Grain | Corn, Buckwheat, Barley | 200 |
|
||||||
|
|
||||||
|
And let’s add another list in the end:
|
||||||
|
|
||||||
|
- Leaves
|
||||||
|
- Berries
|
||||||
|
- Grain
|
546
tests/data/groundtruth/docling_v2/word_sample.yaml
Normal file
546
tests/data/groundtruth/docling_v2/word_sample.yaml
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user