fix(tesseract): initialize df_osd to avoid uninitialized variable error (#1718)

* fix: initialize df_osd to avoid uninitialized variable error

Signed-off-by: IoannisMaras <maras2002@gmail.com>

* Fix formatting

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>

* Satisfy mypy, regenerate OCR tests

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: IoannisMaras <maras2002@gmail.com>
Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Maras Ioannis 2025-06-10 11:57:45 +03:00 committed by GitHub
parent f7f31137f1
commit e979750ce9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
20 changed files with 752 additions and 751 deletions

View File

@ -99,12 +99,12 @@ class TesseractOcrCliModel(BaseOcrModel):
return name, version return name, version
def _run_tesseract(self, ifilename: str, osd: pd.DataFrame): def _run_tesseract(self, ifilename: str, osd: Optional[pd.DataFrame]):
r""" r"""
Run tesseract CLI Run tesseract CLI
""" """
cmd = [self.options.tesseract_cmd] cmd = [self.options.tesseract_cmd]
if self._is_auto: if self._is_auto and osd is not None:
lang = self._parse_language(osd) lang = self._parse_language(osd)
if lang is not None: if lang is not None:
cmd.append("-l") cmd.append("-l")
@ -231,6 +231,7 @@ class TesseractOcrCliModel(BaseOcrModel):
fname = image_file.name fname = image_file.name
high_res_image.save(image_file) high_res_image.save(image_file)
doc_orientation = 0 doc_orientation = 0
df_osd: Optional[pd.DataFrame] = None
try: try:
df_osd = self._perform_osd(fname) df_osd = self._perform_osd(fname)
doc_orientation = _parse_orientation(df_osd) doc_orientation = _parse_orientation(df_osd)

View File

@ -44,9 +44,9 @@
"prov": [ "prov": [
{ {
"bbox": [ "bbox": [
70.90211866351085, 69.6796630536824,
689.216658542347, 689.0124221922704,
504.8720079864275, 504.8720051760782,
764.9216921155637 764.9216921155637
], ],
"page": 1, "page": 1,

View File

@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -157,14 +157,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -195,10 +195,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -237,14 +237,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -262,14 +262,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -293,10 +293,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -335,14 +335,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -360,14 +360,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",

View File

@ -44,10 +44,10 @@
"prov": [ "prov": [
{ {
"bbox": [ "bbox": [
441.304584329099, 441.2561096985719,
132.09610360960653, 131.89488404865142,
521.9863114205704, 522.0347860494834,
151.67751306395223 151.87873262042876
], ],
"page": 1, "page": 1,
"span": [ "span": [
@ -67,10 +67,10 @@
"prov": [ "prov": [
{ {
"bbox": [ "bbox": [
89.12133215549848, 89.23887497045128,
77.02339849621205, 77.02339852098021,
523.3501733013318, 523.208764293368,
124.86176457554109 124.75312428291147
], ],
"page": 1, "page": 1,
"span": [ "span": [

View File

@ -15,14 +15,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -107,14 +107,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -155,10 +155,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -172,14 +172,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -227,14 +227,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -281,10 +281,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -298,14 +298,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -329,10 +329,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -346,14 +346,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -371,14 +371,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -400,10 +400,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -417,14 +417,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",

View File

@ -44,10 +44,10 @@
"prov": [ "prov": [
{ {
"bbox": [ "bbox": [
691.4680194659409, 690.2441821046808,
442.3948768148814, 442.39487414368364,
709.8255850278712, 709.8255852011977,
523.0765988200898 523.076601235155
], ],
"page": 1, "page": 1,
"span": [ "span": [

View File

@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -155,10 +155,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -172,14 +172,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -281,10 +281,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -298,14 +298,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -329,10 +329,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -346,14 +346,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -377,10 +377,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -419,14 +419,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",

View File

@ -15,14 +15,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -107,14 +107,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -227,14 +227,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -377,10 +377,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -394,14 +394,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -419,14 +419,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",

View File

@ -1,2 +1,2 @@
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text> <doctag><text><loc_59><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
</doctag> </doctag>

View File

@ -42,10 +42,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 764.9216921155637, "t": 764.9216921155637,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 689.216658542347, "b": 689.0124221922704,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [

View File

@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -157,14 +157,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -195,10 +195,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -237,14 +237,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -262,14 +262,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -293,10 +293,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 70.90211866351085, "l": 69.6796630536824,
"t": 76.99999977896756, "t": 76.99999977896756,
"r": 504.8720079864275, "r": 504.8720051760782,
"b": 152.70503335218433, "b": 152.90926970226084,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.9715733528137207, "confidence": 0.9715733528137207,
@ -335,14 +335,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 70.90211866351085, "r_x0": 69.6796630536824,
"r_y0": 124.83139551297342, "r_y0": 124.83139494707741,
"r_x1": 504.8720079864275, "r_x1": 504.8720051760782,
"r_y1": 124.83139551297342, "r_y1": 124.83139494707741,
"r_x2": 504.8720079864275, "r_x2": 504.8720051760782,
"r_y2": 102.66666671251768, "r_y2": 104.00000011573796,
"r_x3": 70.90211866351085, "r_x3": 69.6796630536824,
"r_y3": 102.66666671251768, "r_y3": 104.00000011573796,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -360,14 +360,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 73.10852522817731, "r_x0": 71.84193505100733,
"r_y0": 152.70503335218433, "r_y0": 152.90926970226084,
"r_x1": 153.04479435252625, "r_x1": 153.088934155825,
"r_y1": 152.70503335218433, "r_y1": 152.90926970226084,
"r_x2": 153.04479435252625, "r_x2": 153.088934155825,
"r_y2": 130.00136157890958, "r_y2": 129.797125232046,
"r_x3": 73.10852522817731, "r_x3": 71.84193505100733,
"r_y3": 130.00136157890958, "r_y3": 129.797125232046,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",

View File

@ -1,3 +1,3 @@
<doctag><text><loc_371><loc_410><loc_438><loc_422>package</text> <doctag><text><loc_371><loc_410><loc_439><loc_422>package</text>
<text><loc_75><loc_426><loc_440><loc_454>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</text> <text><loc_75><loc_426><loc_440><loc_454>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</text>
</doctag> </doctag>

View File

@ -45,10 +45,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 151.67751306395223, "t": 151.87873262042876,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 132.09610360960653, "b": 131.89488404865142,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [
@ -74,10 +74,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 124.86176457554109, "t": 124.75312428291147,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 77.02339849621205, "b": 77.02339852098021,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [

View File

@ -15,14 +15,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -107,14 +107,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -155,10 +155,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -172,14 +172,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -227,14 +227,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -281,10 +281,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -298,14 +298,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -329,10 +329,10 @@
"id": 0, "id": 0,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 89.12133215549848, "l": 89.23887497045128,
"t": 717.0599273189902, "t": 717.1685676116198,
"r": 523.3501733013318, "r": 523.208764293368,
"b": 764.8982933983192, "b": 764.898293373551,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.7318570613861084, "confidence": 0.7318570613861084,
@ -346,14 +346,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 90.46133071208328, "r_x0": 89.2388782764286,
"r_y0": 764.8982933983192, "r_y0": 764.898293373551,
"r_x1": 520.7638616365624, "r_x1": 521.9863147998661,
"r_y1": 764.8982933983192, "r_y1": 764.898293373551,
"r_x2": 520.7638616365624, "r_x2": 521.9863147998661,
"r_y2": 744.0929853742306, "r_y2": 744.0929853494625,
"r_x3": 90.46133071208328, "r_x3": 89.2388782764286,
"r_y3": 744.0929853742306, "r_y3": 744.0929853494625,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -371,14 +371,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 89.12133215549848, "r_x0": 89.23887497045128,
"r_y0": 741.5247710689902, "r_y0": 739.1977118987292,
"r_x1": 523.3501733013318, "r_x1": 523.208764293368,
"r_y1": 741.5247710689902, "r_y1": 739.1977118987292,
"r_x2": 523.3501733013318, "r_x2": 523.208764293368,
"r_y2": 717.0599273189902, "r_y2": 717.1685676116198,
"r_x3": 89.12133215549848, "r_x3": 89.23887497045128,
"r_y3": 717.0599273189902, "r_y3": 717.1685676116198,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -400,10 +400,10 @@
"id": 2, "id": 2,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 441.304584329099, "l": 441.2561096985719,
"t": 690.244178830579, "t": 690.0429592741025,
"r": 521.9863114205704, "r": 522.0347860494834,
"b": 709.8255882849247, "b": 710.0268078458798,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.5982133150100708, "confidence": 0.5982133150100708,
@ -417,14 +417,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 441.304584329099, "r_x0": 441.2561096985719,
"r_y0": 709.8255882849247, "r_y0": 710.0268078458798,
"r_x1": 521.9863114205704, "r_x1": 522.0347860494834,
"r_y1": 709.8255882849247, "r_y1": 710.0268078458798,
"r_x2": 521.9863114205704, "r_x2": 522.0347860494834,
"r_y2": 690.244178830579, "r_y2": 690.0429592741025,
"r_x3": 441.304584329099, "r_x3": 441.2561096985719,
"r_y3": 690.244178830579, "r_y3": 690.0429592741025,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",

View File

@ -1,3 +1,3 @@
<doctag><page_header><loc_426><loc_60><loc_454><loc_424>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header> <doctag><page_header><loc_426><loc_60><loc_454><loc_424>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
<text><loc_411><loc_61><loc_422><loc_128>package</text> <text><loc_410><loc_61><loc_422><loc_128>package</text>
</doctag> </doctag>

View File

@ -45,10 +45,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 524.2990548540179, "t": 524.2990550512769,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 90.32916553110118, "b": 90.3291657283603,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [
@ -74,10 +74,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 523.0765988200898, "t": 523.076601235155,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 442.3948768148814, "b": 442.39487414368364,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [

View File

@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -155,10 +155,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -172,14 +172,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -281,10 +281,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -298,14 +298,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -329,10 +329,10 @@
"id": 8, "id": 8,
"label": "text", "label": "text",
"bbox": { "bbox": {
"l": 691.4680194659409, "l": 690.2441821046808,
"t": 72.12457305491027, "t": 72.124570639845,
"r": 709.8255850278712, "r": 709.8255852011977,
"b": 152.80629506011857, "b": 152.80629773131633,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 1.0, "confidence": 1.0,
@ -346,14 +346,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 691.4680194659409, "r_x0": 690.2441821046808,
"r_y0": 152.80629506011857, "r_y0": 152.80629773131633,
"r_x1": 709.8255850278712, "r_x1": 709.8255852011977,
"r_y1": 152.80629506011857, "r_y1": 152.80629773131633,
"r_x2": 709.8255850278712, "r_x2": 709.8255852011977,
"r_y2": 72.12457305491027, "r_y2": 72.124570639845,
"r_x3": 691.4680194659409, "r_x3": 690.2441821046808,
"r_y3": 72.12457305491027, "r_y3": 72.124570639845,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "package", "text": "package",
@ -377,10 +377,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 717.1685859527342, "l": 717.168585936602,
"t": 70.90211702098213, "t": 70.90211682372312,
"r": 764.8982839673505, "r": 764.8982839673505,
"b": 504.8720063438988, "b": 504.8720061466397,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6915205121040344, "confidence": 0.6915205121040344,
@ -419,14 +419,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 717.1685859527342, "r_x0": 717.168585936602,
"r_y0": 504.8720063438988, "r_y0": 504.8720061466397,
"r_x1": 737.9738558298501, "r_x1": 737.9738558137178,
"r_y1": 504.8720063438988, "r_y1": 504.8720061466397,
"r_x2": 737.9738558298501, "r_x2": 737.9738558137178,
"r_y2": 70.90211702098213, "r_y2": 70.90211682372312,
"r_x3": 717.1685859527342, "r_x3": 717.168585936602,
"r_y3": 70.90211702098213, "r_y3": 70.90211682372312,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",

View File

@ -1,3 +1,3 @@
<doctag><page_header><loc_46><loc_75><loc_74><loc_440>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header> <doctag><page_header><loc_46><loc_75><loc_75><loc_440>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
<text><loc_78><loc_370><loc_90><loc_438>package</text> <text><loc_78><loc_370><loc_90><loc_438>package</text>
</doctag> </doctag>

View File

@ -45,10 +45,10 @@
{ {
"page_no": 1, "page_no": 1,
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 506.07735421856773, "t": 506.0744964609271,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 71.88562244773436, "b": 71.87755635676046,
"coord_origin": "BOTTOMLEFT" "coord_origin": "BOTTOMLEFT"
}, },
"charspan": [ "charspan": [

View File

@ -15,14 +15,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -40,14 +40,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -90,10 +90,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -107,14 +107,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -132,14 +132,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -210,10 +210,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -227,14 +227,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -252,14 +252,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",
@ -377,10 +377,10 @@
"id": 0, "id": 0,
"label": "page_header", "label": "page_header",
"bbox": { "bbox": {
"l": 77.10171546422428, "l": 77.10171545548258,
"t": 89.12381765643227, "t": 89.1266754140729,
"r": 124.91101654503161, "r": 126.08064862014129,
"b": 523.3155494272656, "b": 523.3236155182395,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"confidence": 0.6016772389411926, "confidence": 0.6016772389411926,
@ -394,14 +394,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 77.10171546422428, "r_x0": 77.10171545548258,
"r_y0": 520.7638577050515, "r_y0": 520.7638571913312,
"r_x1": 96.6831586150625, "r_x1": 96.68315797053792,
"r_y1": 520.7638577050515, "r_y1": 520.7638571913312,
"r_x2": 96.6831586150625, "r_x2": 96.68315797053792,
"r_y2": 89.23887398109309, "r_y2": 89.2388734673729,
"r_x3": 77.10171546422428, "r_x3": 77.10171545548258,
"r_y3": 89.23887398109309, "r_y3": 89.2388734673729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "Docling bundles PDF document conversion to", "text": "Docling bundles PDF document conversion to",
@ -419,14 +419,14 @@
"a": 255 "a": 255
}, },
"rect": { "rect": {
"r_x0": 100.55299576256091, "r_x0": 100.64168123325977,
"r_y0": 523.3155494272656, "r_y0": 523.3236155182395,
"r_x1": 124.91101654503161, "r_x1": 126.08064862014129,
"r_y1": 523.3155494272656, "r_y1": 523.3236155182395,
"r_x2": 124.91101654503161, "r_x2": 126.08064862014129,
"r_y2": 89.12381765643227, "r_y2": 89.1266754140729,
"r_x3": 100.55299576256091, "r_x3": 100.64168123325977,
"r_y3": 89.12381765643227, "r_y3": 89.1266754140729,
"coord_origin": "TOPLEFT" "coord_origin": "TOPLEFT"
}, },
"text": "JSON and Markdown in an easy self contained", "text": "JSON and Markdown in an easy self contained",