Docling/tests/data_scanned/groundtruth/docling_v1/ocr_test.json
Maras Ioannis e979750ce9
fix(tesseract): initialize df_osd to avoid uninitialized variable error (#1718)
* fix: initialize df_osd to avoid uninitialized variable error

Signed-off-by: IoannisMaras <maras2002@gmail.com>

* Fix formatting

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>

* Satisfy mypy, regenerate OCR tests

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: IoannisMaras <maras2002@gmail.com>
Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
Co-authored-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
2025-06-10 10:57:45 +02:00

83 lines
1.8 KiB
JSON
Vendored

{
"_name": "",
"type": "pdf-document",
"description": {
"title": null,
"abstract": null,
"authors": null,
"affiliations": null,
"subjects": null,
"keywords": null,
"publication_date": null,
"languages": null,
"license": null,
"publishers": null,
"url_refs": null,
"references": null,
"publication": null,
"reference_count": null,
"citation_count": null,
"citation_date": null,
"advanced": null,
"analytics": null,
"logs": [],
"collection": null,
"acquisition": null
},
"file-info": {
"filename": "ocr_test.pdf",
"filename-prov": null,
"document-hash": "80f38f5b87a84870681556176a9622186fd200dd32c5557be9e0c0af05b8bc61",
"#-pages": 1,
"collection-name": null,
"description": null,
"page-hashes": [
{
"hash": "14d896dc8bcb7ee7c08c0347eb6be8dcb92a3782501992f1ea14d2e58077d4e3",
"model": "default",
"page": 1
}
]
},
"main-text": [
{
"prov": [
{
"bbox": [
69.6796630536824,
689.0124221922704,
504.8720051760782,
764.9216921155637
],
"page": 1,
"span": [
0,
94
],
"__ref_s3_data": null
}
],
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
}
],
"figures": [],
"tables": [],
"bitmaps": null,
"equations": [],
"footnotes": [],
"page-dimensions": [
{
"height": 841.9216918945312,
"page": 1,
"width": 595.201171875
}
],
"page-footers": [],
"page-headers": [],
"_s3_data": null,
"identifiers": null
}