feat(ocr): auto-detect rotated pages in Tesseract (#1167)

* fix(ocr): tesseract support mis-oriented documents

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): update missing test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): rotate image to the natural orientation before layout prediction

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): move bounding bow rotation util to orientation.py

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): refactor rotation utilities

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): avoid to swallow tesseract errors causing orientation detection failures

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrCliModel`

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrModel`

* chore(ocr): default `TesseractOcrCliModel._is_auto` to `False`

* fix(ocr): fix `TesseractOcrCliModel._is_auto` computation

* chore(ocr): improve logging in case of OSD failure in `TesseractOcrCliModel` and `TesseractOcrModel`

---------

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
Clément Doumouro 2025-05-21 18:12:33 +02:00 committed by GitHub
parent 90875247e5
commit 45265bf8b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
96 changed files with 9864 additions and 5258 deletions

View File

@ -2,6 +2,7 @@ import csv
import io
import logging
import os
import subprocess
import tempfile
from collections.abc import Iterable
from pathlib import Path
@ -10,7 +11,7 @@ from typing import List, Optional, Tuple, Type
import pandas as pd
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle, TextCell
from docling_core.types.doc.page import TextCell
from docling.datamodel.base_models import Page
from docling.datamodel.document import ConversionResult
@ -21,7 +22,11 @@ from docling.datamodel.pipeline_options import (
)
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.ocr_utils import map_tesseract_script
from docling.utils.ocr_utils import (
map_tesseract_script,
parse_tesseract_orientation,
tesseract_box_to_bounding_rectangle,
)
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__)
@ -49,6 +54,7 @@ class TesseractOcrCliModel(BaseOcrModel):
self._version: Optional[str] = None
self._tesseract_languages: Optional[List[str]] = None
self._script_prefix: Optional[str] = None
self._is_auto: bool = "auto" in self.options.lang
if self.enabled:
try:
@ -93,14 +99,13 @@ class TesseractOcrCliModel(BaseOcrModel):
return name, version
def _run_tesseract(self, ifilename: str):
def _run_tesseract(self, ifilename: str, osd: pd.DataFrame):
r"""
Run tesseract CLI
"""
cmd = [self.options.tesseract_cmd]
if "auto" in self.options.lang:
lang = self._detect_language(ifilename)
if self._is_auto:
lang = self._parse_language(osd)
if lang is not None:
cmd.append("-l")
cmd.append(lang)
@ -115,13 +120,12 @@ class TesseractOcrCliModel(BaseOcrModel):
cmd += [ifilename, "stdout", "tsv"]
_log.info("command: {}".format(" ".join(cmd)))
proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
output, _ = proc.communicate()
output = subprocess.run(cmd, stdout=PIPE, stderr=DEVNULL, check=True)
# _log.info(output)
# Decode the byte string to a regular string
decoded_data = output.decode("utf-8")
decoded_data = output.stdout.decode("utf-8")
# _log.info(decoded_data)
# Read the TSV file generated by Tesseract
@ -139,22 +143,24 @@ class TesseractOcrCliModel(BaseOcrModel):
return df_filtered
def _detect_language(self, ifilename: str):
def _perform_osd(self, ifilename: str) -> pd.DataFrame:
r"""
Run tesseract in PSM 0 mode to detect the language
"""
assert self._tesseract_languages is not None
cmd = [self.options.tesseract_cmd]
cmd.extend(["--psm", "0", "-l", "osd", ifilename, "stdout"])
_log.info("command: {}".format(" ".join(cmd)))
proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
output, _ = proc.communicate()
decoded_data = output.decode("utf-8")
output = subprocess.run(cmd, capture_output=True, check=True)
decoded_data = output.stdout.decode("utf-8")
df_detected = pd.read_csv(
io.StringIO(decoded_data), sep=":", header=None, names=["key", "value"]
)
scripts = df_detected.loc[df_detected["key"] == "Script"].value.tolist()
return df_detected
def _parse_language(self, df_osd: pd.DataFrame) -> Optional[str]:
assert self._tesseract_languages is not None
scripts = df_osd.loc[df_osd["key"] == "Script"].value.tolist()
if len(scripts) == 0:
_log.warning("Tesseract cannot detect the script of the page")
return None
@ -182,9 +188,8 @@ class TesseractOcrCliModel(BaseOcrModel):
cmd = [self.options.tesseract_cmd]
cmd.append("--list-langs")
_log.info("command: {}".format(" ".join(cmd)))
proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
output, _ = proc.communicate()
decoded_data = output.decode("utf-8")
output = subprocess.run(cmd, stdout=PIPE, stderr=DEVNULL, check=True)
decoded_data = output.stdout.decode("utf-8")
df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
self._tesseract_languages = df_list[0].tolist()[1:]
@ -203,7 +208,7 @@ class TesseractOcrCliModel(BaseOcrModel):
yield from page_batch
return
for page in page_batch:
for page_i, page in enumerate(page_batch):
assert page._backend is not None
if not page._backend.is_valid():
yield page
@ -212,7 +217,7 @@ class TesseractOcrCliModel(BaseOcrModel):
ocr_rects = self.get_ocr_rects(page)
all_ocr_cells = []
for ocr_rect in ocr_rects:
for ocr_rect_i, ocr_rect in enumerate(ocr_rects):
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
@ -225,8 +230,42 @@ class TesseractOcrCliModel(BaseOcrModel):
) as image_file:
fname = image_file.name
high_res_image.save(image_file)
df_result = self._run_tesseract(fname)
doc_orientation = 0
try:
df_osd = self._perform_osd(fname)
doc_orientation = _parse_orientation(df_osd)
except subprocess.CalledProcessError as exc:
_log.error(
"OSD failed (doc %s, page: %s, "
"OCR rectangle: %s, processed image file %s):\n %s",
conv_res.input.file,
page_i,
ocr_rect_i,
image_file,
exc.stderr,
)
# Skipping if OSD fail when in auto mode, otherwise proceed
# to OCR in the hope OCR will succeed while OSD failed
if self._is_auto:
continue
if doc_orientation != 0:
high_res_image = high_res_image.rotate(
-doc_orientation, expand=True
)
high_res_image.save(fname)
try:
df_result = self._run_tesseract(fname, df_osd)
except subprocess.CalledProcessError as exc:
_log.error(
"tesseract OCR failed (doc %s, page: %s, "
"OCR rectangle: %s, processed image file %s):\n %s",
conv_res.input.file,
page_i,
ocr_rect_i,
image_file,
exc.stderr,
)
continue
finally:
if os.path.exists(fname):
os.remove(fname)
@ -238,31 +277,30 @@ class TesseractOcrCliModel(BaseOcrModel):
text = row["text"]
conf = row["conf"]
l = float(row["left"]) # noqa: E741
b = float(row["top"])
w = float(row["width"])
h = float(row["height"])
t = b + h
r = l + w
left, top = float(row["left"]), float(row["top"])
right = left + float(row["width"])
bottom = top + row["height"]
bbox = BoundingBox(
l=left,
t=top,
r=right,
b=bottom,
coord_origin=CoordOrigin.TOPLEFT,
)
rect = tesseract_box_to_bounding_rectangle(
bbox,
original_offset=ocr_rect,
scale=self.scale,
orientation=doc_orientation,
im_size=high_res_image.size,
)
cell = TextCell(
index=ix,
text=str(text),
orig=str(text),
from_ocr=True,
confidence=conf / 100.0,
rect=BoundingRectangle.from_bounding_box(
BoundingBox.from_tuple(
coord=(
(l / self.scale) + ocr_rect.l,
(b / self.scale) + ocr_rect.t,
(r / self.scale) + ocr_rect.l,
(t / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
)
),
rect=rect,
)
all_ocr_cells.append(cell)
@ -278,3 +316,9 @@ class TesseractOcrCliModel(BaseOcrModel):
@classmethod
def get_options_type(cls) -> Type[OcrOptions]:
return TesseractCliOcrOptions
def _parse_orientation(df_osd: pd.DataFrame) -> int:
orientations = df_osd.loc[df_osd["key"] == "Orientation in degrees"].value.tolist()
orientation = parse_tesseract_orientation(orientations[0].strip())
return orientation

View File

@ -1,12 +1,11 @@
from __future__ import annotations
import logging
from collections.abc import Iterable
from pathlib import Path
from typing import Optional, Type
from typing import Iterable, Optional, Type
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle, TextCell
from docling_core.types.doc.page import TextCell
from docling.datamodel.base_models import Page
from docling.datamodel.document import ConversionResult
@ -17,7 +16,11 @@ from docling.datamodel.pipeline_options import (
)
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.ocr_utils import map_tesseract_script
from docling.utils.ocr_utils import (
map_tesseract_script,
parse_tesseract_orientation,
tesseract_box_to_bounding_rectangle,
)
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__)
@ -38,7 +41,7 @@ class TesseractOcrModel(BaseOcrModel):
accelerator_options=accelerator_options,
)
self.options: TesseractOcrOptions
self._is_auto: bool = "auto" in self.options.lang
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
self.reader = None
self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
@ -95,13 +98,13 @@ class TesseractOcrModel(BaseOcrModel):
if lang == "auto":
self.reader = tesserocr.PyTessBaseAPI(**tesserocr_kwargs)
self.osd_reader = tesserocr.PyTessBaseAPI(
**{"lang": "osd", "psm": tesserocr.PSM.OSD_ONLY} | tesserocr_kwargs
)
else:
self.reader = tesserocr.PyTessBaseAPI(
**{"lang": lang} | tesserocr_kwargs,
)
self.osd_reader = tesserocr.PyTessBaseAPI(
**{"lang": "osd", "psm": tesserocr.PSM.OSD_ONLY} | tesserocr_kwargs
)
self.reader_RIL = tesserocr.RIL
def __del__(self):
@ -118,19 +121,20 @@ class TesseractOcrModel(BaseOcrModel):
yield from page_batch
return
for page in page_batch:
for page_i, page in enumerate(page_batch):
assert page._backend is not None
if not page._backend.is_valid():
yield page
else:
with TimeRecorder(conv_res, "ocr"):
assert self.reader is not None
assert self.osd_reader is not None
assert self._tesserocr_languages is not None
ocr_rects = self.get_ocr_rects(page)
all_ocr_cells = []
for ocr_rect in ocr_rects:
for ocr_rect_i, ocr_rect in enumerate(ocr_rects):
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
@ -139,16 +143,27 @@ class TesseractOcrModel(BaseOcrModel):
)
local_reader = self.reader
if "auto" in self.options.lang:
assert self.osd_reader is not None
self.osd_reader.SetImage(high_res_image)
osd = self.osd_reader.DetectOrientationScript()
# No text, probably
if osd is None:
self.osd_reader.SetImage(high_res_image)
osd = self.osd_reader.DetectOrientationScript()
# No text, or Orientation and Script detection failure
if osd is None:
_log.error(
"OSD failed for doc (doc %s, page: %s, "
"OCR rectangle: %s)",
conv_res.input.file,
page_i,
ocr_rect_i,
)
# Skipping if OSD fail when in auto mode, otherwise proceed
# to OCR in the hope OCR will succeed while OSD failed
if self._is_auto:
continue
doc_orientation = parse_tesseract_orientation(osd["orient_deg"])
if doc_orientation != 0:
high_res_image = high_res_image.rotate(
-doc_orientation, expand=True
)
if self._is_auto:
script = osd["script_name"]
script = map_tesseract_script(script)
lang = f"{self.script_prefix}{script}"
@ -188,11 +203,23 @@ class TesseractOcrModel(BaseOcrModel):
# Extract text within the bounding box
text = local_reader.GetUTF8Text().strip()
confidence = local_reader.MeanTextConf()
left = box["x"] / self.scale
bottom = box["y"] / self.scale
right = (box["x"] + box["w"]) / self.scale
top = (box["y"] + box["h"]) / self.scale
left, top = box["x"], box["y"]
right = left + box["w"]
bottom = top + box["h"]
bbox = BoundingBox(
l=left,
t=top,
r=right,
b=bottom,
coord_origin=CoordOrigin.TOPLEFT,
)
rect = tesseract_box_to_bounding_rectangle(
bbox,
original_offset=ocr_rect,
scale=self.scale,
orientation=doc_orientation,
im_size=high_res_image.size,
)
cells.append(
TextCell(
index=ix,
@ -200,12 +227,7 @@ class TesseractOcrModel(BaseOcrModel):
orig=text,
from_ocr=True,
confidence=confidence,
rect=BoundingRectangle.from_bounding_box(
BoundingBox.from_tuple(
coord=(left, top, right, bottom),
origin=CoordOrigin.TOPLEFT,
),
),
rect=rect,
)
)

View File

@ -1,3 +1,11 @@
from typing import Optional, Tuple
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle
from docling.utils.orientation import CLIPPED_ORIENTATIONS, rotate_bounding_box
def map_tesseract_script(script: str) -> str:
r""" """
if script == "Katakana" or script == "Hiragana":
@ -7,3 +15,55 @@ def map_tesseract_script(script: str) -> str:
elif script == "Korean":
script = "Hangul"
return script
def parse_tesseract_orientation(orientation: str) -> int:
# Tesseract orientation is [0, 90, 180, 270] clockwise, bounding rectangle angles
# are [0, 360[ counterclockwise
parsed = int(orientation)
if parsed not in CLIPPED_ORIENTATIONS:
msg = (
f"invalid tesseract document orientation {orientation}, "
f"expected orientation: {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)
parsed = -parsed
parsed %= 360
return parsed
def tesseract_box_to_bounding_rectangle(
bbox: BoundingBox,
*,
original_offset: Optional[BoundingBox] = None,
scale: float,
orientation: int,
im_size: Tuple[int, int],
) -> BoundingRectangle:
# box is in the top, left, height, width format, top left coordinates
rect = rotate_bounding_box(bbox, angle=-orientation, im_size=im_size)
rect = BoundingRectangle(
r_x0=rect.r_x0 / scale,
r_y0=rect.r_y0 / scale,
r_x1=rect.r_x1 / scale,
r_y1=rect.r_y1 / scale,
r_x2=rect.r_x2 / scale,
r_y2=rect.r_y2 / scale,
r_x3=rect.r_x3 / scale,
r_y3=rect.r_y3 / scale,
coord_origin=CoordOrigin.TOPLEFT,
)
if original_offset is not None:
if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
msg = f"expected coordinate origin to be {CoordOrigin.TOPLEFT.value}"
raise ValueError(msg)
if original_offset is not None:
rect.r_x0 += original_offset.l
rect.r_x1 += original_offset.l
rect.r_x2 += original_offset.l
rect.r_x3 += original_offset.l
rect.r_y0 += original_offset.t
rect.r_y1 += original_offset.t
rect.r_y2 += original_offset.t
rect.r_y3 += original_offset.t
return rect

View File

@ -0,0 +1,71 @@
from typing import Tuple
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
def rotate_bounding_box(
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
) -> BoundingRectangle:
# The box is left top width height in TOPLEFT coordinates
# Bounding rectangle start with r_0 at the bottom left whatever the
# coordinate system. Then other corners are found rotating counterclockwise
bbox = bbox.to_top_left_origin(im_size[1])
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
im_h, im_w = im_size
angle = angle % 360
if angle == 0:
r_x0 = left
r_y0 = top + height
r_x1 = r_x0 + width
r_y1 = r_y0
r_x2 = r_x0 + width
r_y2 = r_y0 - height
r_x3 = r_x0
r_y3 = r_y0 - height
elif angle == 90:
r_x0 = im_w - (top + height)
r_y0 = left
r_x1 = r_x0
r_y1 = r_y0 + width
r_x2 = r_x0 + height
r_y2 = r_y0 + width
r_x3 = r_x0
r_y3 = r_y0 + width
elif angle == 180:
r_x0 = im_h - left
r_y0 = im_w - (top + height)
r_x1 = r_x0 - width
r_y1 = r_y0
r_x2 = r_x0 - width
r_y2 = r_y0 + height
r_x3 = r_x0
r_y3 = r_y0 + height
elif angle == 270:
r_x0 = top + height
r_y0 = im_h - left
r_x1 = r_x0
r_y1 = r_y0 - width
r_x2 = r_x0 - height
r_y2 = r_y0 - width
r_x3 = r_x0 - height
r_y3 = r_y0
else:
msg = (
f"invalid orientation {angle}, expected values in:"
f" {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)
return BoundingRectangle(
r_x0=r_x0,
r_y0=r_y0,
r_x1=r_x1,
r_y1=r_y1,
r_x2=r_x2,
r_y2=r_y2,
r_x3=r_x3,
r_y3=r_y3,
coord_origin=CoordOrigin.TOPLEFT,
)

View File

@ -4913,9 +4913,9 @@
{
"bbox": [
315.65362548828125,
489.19854736328125,
489.1985778808594,
537.1475219726562,
563.2765655517578
563.276611328125
],
"page": 1,
"span": [
@ -4979,9 +4979,9 @@
{
"bbox": [
312.10369873046875,
541.3901214599609,
541.3901519775391,
550.38916015625,
713.5591125488281
713.5591354370117
],
"page": 3,
"span": [
@ -5003,7 +5003,7 @@
74.30525970458984,
608.2984924316406,
519.9801025390625,
714.0887908935547
714.0887985229492
],
"page": 5,
"span": [
@ -5024,7 +5024,7 @@
"bbox": [
53.03328323364258,
284.3311462402344,
285.3731384277344,
285.3731689453125,
534.3346557617188
],
"page": 5,
@ -5047,7 +5047,7 @@
49.97503662109375,
604.4210662841797,
301.6335754394531,
688.2873153686523
688.2873382568359
],
"page": 8,
"span": [
@ -5066,7 +5066,7 @@
"prov": [
{
"bbox": [
305.58367919921875,
305.5836486816406,
611.3732452392578,
554.8258666992188,
693.3458404541016
@ -5111,9 +5111,9 @@
{
"bbox": [
216.76925659179688,
348.6529541015625,
348.65301513671875,
375.7829284667969,
411.5093994140625
411.5093688964844
],
"page": 8,
"span": [
@ -5132,10 +5132,10 @@
"prov": [
{
"bbox": [
383.1363830566406,
383.1364440917969,
349.2250671386719,
542.1131591796875,
410.7687072753906
542.1132202148438,
410.7686767578125
],
"page": 8,
"span": [
@ -5220,7 +5220,7 @@
"prov": [
{
"bbox": [
51.153778076171875,
51.15378952026367,
447.09332275390625,
282.8598937988281,
687.6914825439453
@ -5286,8 +5286,8 @@
"prov": [
{
"bbox": [
55.11635208129883,
542.6654968261719,
55.116363525390625,
542.6654510498047,
279.370849609375,
655.7449951171875
],
@ -5375,9 +5375,9 @@
{
"bbox": [
323.46868896484375,
327.73956298828125,
327.739501953125,
525.9569091796875,
429.5492248535156
429.5491638183594
],
"page": 15,
"span": [
@ -5421,7 +5421,7 @@
66.79948425292969,
293.8616027832031,
528.5565795898438,
538.3837127685547
538.3836822509766
],
"page": 16,
"span": [
@ -5443,9 +5443,9 @@
{
"bbox": [
315.65362548828125,
489.19854736328125,
489.1985778808594,
537.1475219726562,
563.2765655517578
563.276611328125
],
"page": 1,
"span": [
@ -6250,10 +6250,10 @@
"prov": [
{
"bbox": [
310.6757507324219,
310.67584228515625,
636.7794799804688,
542.9546508789062,
718.8061218261719
542.9547119140625,
718.8061141967773
],
"page": 4,
"span": [
@ -9329,7 +9329,7 @@
332.9688720703125,
148.73028564453125,
520.942138671875,
251.71649169921875
251.7164306640625
],
"page": 7,
"span": [
@ -10152,9 +10152,9 @@
{
"bbox": [
53.62853240966797,
499.6000061035156,
499.60003662109375,
298.5574951171875,
573.0514526367188
573.0514221191406
],
"page": 8,
"span": [
@ -12941,7 +12941,7 @@
"prov": [
{
"bbox": [
83.31759643554688,
83.31756591796875,
304.7430114746094,
248.873046875,
395.9864501953125
@ -12968,9 +12968,9 @@
{
"bbox": [
310.3294372558594,
655.8524932861328,
655.8524780273438,
555.8338623046875,
690.8223342895508
690.8223266601562
],
"page": 13,
"span": [
@ -12994,9 +12994,9 @@
{
"bbox": [
309.9566345214844,
607.2774353027344,
607.2774658203125,
555.7466430664062,
637.3854827880859
637.3855133056641
],
"page": 13,
"span": [
@ -13019,10 +13019,10 @@
"prov": [
{
"bbox": [
309.96356201171875,
558.4485168457031,
555.7053833007812,
596.2945098876953
309.9635314941406,
558.4485473632812,
555.7054443359375,
596.2945861816406
],
"page": 13,
"span": [
@ -13175,10 +13175,10 @@
"prov": [
{
"bbox": [
51.726383209228516,
447.7555236816406,
283.1140441894531,
518.3907470703125
51.72642135620117,
447.7554931640625,
283.114013671875,
518.3907165527344
],
"page": 14,
"span": [
@ -13201,7 +13201,7 @@
"prov": [
{
"bbox": [
51.43488693237305,
51.434879302978516,
300.17974853515625,
310.7267150878906,
338.51251220703125
@ -13253,7 +13253,7 @@
"prov": [
{
"bbox": [
51.2728271484375,
51.27280807495117,
200.086669921875,
311.0897216796875,
238.271484375
@ -13435,10 +13435,10 @@
"prov": [
{
"bbox": [
319.0649719238281,
319.06494140625,
122.80792236328125,
533.7738647460938,
182.1590576171875
533.77392578125,
182.1591796875
],
"page": 14,
"span": [
@ -13461,8 +13461,8 @@
"prov": [
{
"bbox": [
55.11635208129883,
542.6654968261719,
55.116363525390625,
542.6654510498047,
279.370849609375,
655.7449951171875
],
@ -13513,10 +13513,10 @@
"prov": [
{
"bbox": [
323.0059814453125,
569.0885772705078,
525.9517211914062,
670.4528656005859
323.0059509277344,
569.0885925292969,
525.95166015625,
670.4528503417969
],
"page": 15,
"span": [
@ -13540,9 +13540,9 @@
{
"bbox": [
323.384765625,
447.9078674316406,
447.90789794921875,
526.1268920898438,
550.0270690917969
550.0270538330078
],
"page": 15,
"span": [
@ -13566,9 +13566,9 @@
{
"bbox": [
323.46868896484375,
327.73956298828125,
327.739501953125,
525.9569091796875,
429.5492248535156
429.5491638183594
],
"page": 15,
"span": [

File diff suppressed because it is too large Load Diff

View File

@ -3099,9 +3099,9 @@
"prov": [
{
"bbox": [
323.4081115722656,
266.14935302734375,
553.295166015625,
323.408203125,
266.1492919921875,
553.2952270507812,
541.6512603759766
],
"page": 1,
@ -3122,9 +3122,9 @@
{
"bbox": [
88.33030700683594,
571.4317626953125,
571.4317321777344,
263.7049560546875,
699.1134490966797
699.1134796142578
],
"page": 3,
"span": [
@ -3144,9 +3144,9 @@
{
"bbox": [
53.05912780761719,
251.1358642578125,
251.135986328125,
295.8506164550781,
481.20867919921875
481.2087097167969
],
"page": 4,
"span": [
@ -3234,9 +3234,9 @@
{
"bbox": [
98.93103790283203,
497.91845703125,
497.91851806640625,
512.579833984375,
654.5244903564453
654.5245208740234
],
"page": 4,
"span": [
@ -8153,7 +8153,7 @@
62.02753829956055,
440.3381042480469,
285.78955078125,
596.3199462890625
596.3199310302734
],
"page": 6,
"span": [
@ -10514,9 +10514,9 @@
"prov": [
{
"bbox": [
80.35527038574219,
80.35525512695312,
496.5545349121094,
267.00823974609375,
267.0082092285156,
641.0637054443359
],
"page": 7,
@ -14214,10 +14214,10 @@
"prov": [
{
"bbox": [
72.65901947021484,
452.14599609375,
274.8346862792969,
619.5191650390625
72.6590347290039,
452.1459655761719,
274.83465576171875,
619.5191955566406
],
"page": 8,
"span": [

File diff suppressed because it is too large Load Diff

View File

@ -213,9 +213,9 @@
"prov": [
{
"bbox": [
139.6674041748047,
139.66746520996094,
322.5054626464844,
475.00927734375,
475.0093078613281,
454.4546203613281
],
"page": 1,

View File

@ -2646,7 +2646,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -2686,7 +2686,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,
@ -2816,7 +2816,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -2881,7 +2881,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -3096,7 +3096,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -3280,9 +3280,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -7852,7 +7852,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -8184,9 +8184,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -13582,7 +13582,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -13628,7 +13628,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,
@ -13770,7 +13770,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -13841,7 +13841,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -14062,7 +14062,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -14252,9 +14252,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -19713,7 +19713,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -20153,7 +20153,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -20224,7 +20224,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -20445,7 +20445,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -20635,9 +20635,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -26096,7 +26096,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -26440,7 +26440,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -26486,7 +26486,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,

View File

@ -2498,9 +2498,9 @@
{
"bbox": [
148.45364379882812,
366.1538391113281,
366.1537780761719,
464.3608093261719,
583.6257476806641
583.6257629394531
],
"page": 2,
"span": [
@ -2541,9 +2541,9 @@
"prov": [
{
"bbox": [
164.6503143310547,
164.65028381347656,
511.6590576171875,
449.550537109375,
449.5505676269531,
628.2029113769531
],
"page": 7,
@ -2563,7 +2563,7 @@
"prov": [
{
"bbox": [
140.70960998535156,
140.70968627929688,
198.32281494140625,
472.73382568359375,
283.9361572265625
@ -2585,10 +2585,10 @@
"prov": [
{
"bbox": [
162.67434692382812,
128.786376953125,
451.70068359375,
347.3774719238281
162.67430114746094,
128.78643798828125,
451.70062255859375,
347.37744140625
],
"page": 10,
"span": [
@ -2607,9 +2607,9 @@
"prov": [
{
"bbox": [
168.3928985595703,
168.39285278320312,
157.99432373046875,
447.3513488769531,
447.35137939453125,
610.0334930419922
],
"page": 11,
@ -4065,7 +4065,7 @@
143.6376495361328,
528.7375183105469,
470.8485412597656,
635.6522827148438
635.6522979736328
],
"page": 10,
"span": [

File diff suppressed because it is too large Load Diff

View File

@ -426,7 +426,7 @@
320.4467468261719,
81.689208984375,
558.8576049804688,
352.3592834472656
352.359375
],
"page": 1,
"span": [

View File

@ -3411,7 +3411,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -4081,7 +4081,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -4611,7 +4611,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -4651,7 +4651,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -4711,12 +4711,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -5463,7 +5463,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -5528,7 +5528,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -5593,7 +5593,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,
@ -6624,7 +6624,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -7312,7 +7312,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -7854,7 +7854,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -7900,7 +7900,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -7966,12 +7966,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -8738,7 +8738,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -8809,7 +8809,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -8880,7 +8880,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,
@ -9904,7 +9904,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -10592,7 +10592,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -11134,7 +11134,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -11180,7 +11180,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -11246,12 +11246,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -12018,7 +12018,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -12089,7 +12089,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -12162,7 +12162,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,

View File

@ -1541,7 +1541,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -1581,7 +1581,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -2036,7 +2036,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -2719,7 +2719,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -2765,7 +2765,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -3232,7 +3232,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -3914,7 +3914,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -3960,7 +3960,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -4427,7 +4427,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -5782,7 +5782,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -6302,7 +6302,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -7037,7 +7037,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,
@ -7092,7 +7092,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -7630,7 +7630,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -8389,7 +8389,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,
@ -8437,7 +8437,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -8975,7 +8975,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -9736,7 +9736,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,

View File

@ -1071,7 +1071,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -1111,7 +1111,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -1176,7 +1176,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -1576,7 +1576,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -1946,7 +1946,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -1986,7 +1986,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -2051,7 +2051,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -2346,7 +2346,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -2392,7 +2392,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -2463,7 +2463,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -2893,7 +2893,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -3281,7 +3281,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -3327,7 +3327,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -3398,7 +3398,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -3692,7 +3692,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -3738,7 +3738,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -3809,7 +3809,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -4239,7 +4239,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -4627,7 +4627,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -4673,7 +4673,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -4744,7 +4744,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -5748,7 +5748,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -5878,7 +5878,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -6173,7 +6173,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -6313,7 +6313,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -6598,7 +6598,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -6740,7 +6740,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -7053,7 +7053,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -7199,7 +7199,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -7489,7 +7489,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -7631,7 +7631,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -7944,7 +7944,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -8090,7 +8090,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -10010,7 +10010,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -10380,7 +10380,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -10470,7 +10470,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -10585,7 +10585,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -10815,7 +10815,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -10930,7 +10930,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -11070,7 +11070,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -11553,7 +11553,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -11941,7 +11941,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -12037,7 +12037,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -12158,7 +12158,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -12400,7 +12400,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -12521,7 +12521,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -12667,7 +12667,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -13149,7 +13149,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -13537,7 +13537,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -13633,7 +13633,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -13754,7 +13754,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -13996,7 +13996,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -14117,7 +14117,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -14263,7 +14263,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -15942,7 +15942,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -16222,7 +16222,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -16362,7 +16362,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -16772,7 +16772,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -17077,7 +17077,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -17257,7 +17257,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,
@ -17707,7 +17707,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -17999,7 +17999,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -18145,7 +18145,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -18579,7 +18579,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -18896,7 +18896,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -19088,7 +19088,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,
@ -19549,7 +19549,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -19841,7 +19841,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -19987,7 +19987,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -20421,7 +20421,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -20738,7 +20738,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -20930,7 +20930,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,

View File

@ -222,7 +222,7 @@
"prov": [
{
"bbox": [
134.9199981689453,
134.9200439453125,
281.78173828125,
475.6635437011719,
487.109375
@ -244,7 +244,7 @@
"prov": [
{
"bbox": [
218.81556701660156,
218.8155517578125,
283.10589599609375,
391.96246337890625,
513.9846496582031

View File

@ -1390,7 +1390,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -2174,7 +2174,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -2909,7 +2909,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -3623,7 +3623,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -3938,7 +3938,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -4302,7 +4302,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,
@ -4337,7 +4337,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -4658,7 +4658,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -5040,7 +5040,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,
@ -5072,7 +5072,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -5393,7 +5393,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -5729,7 +5729,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,

View File

@ -3989,7 +3989,7 @@
"prov": [
{
"bbox": [
33.09052658081055,
33.09040069580078,
89.5469970703125,
585.1502075195312,
498.9671630859375
@ -4055,9 +4055,9 @@
"prov": [
{
"bbox": [
64.16704559326172,
64.1669921875,
103.87176513671875,
258.77435302734375,
258.7742919921875,
188.49365234375
],
"page": 3,
@ -4099,7 +4099,7 @@
"prov": [
{
"bbox": [
145.41448974609375,
145.4144744873047,
156.616943359375,
252.08840942382812,
264.7552490234375
@ -4121,10 +4121,10 @@
"prov": [
{
"bbox": [
32.075260162353516,
554.0421142578125,
32.075252532958984,
554.0420684814453,
239.620361328125,
721.4226608276367
721.4226226806641
],
"page": 5,
"span": [
@ -4168,7 +4168,7 @@
135.97177124023438,
381.39068603515625,
545.4180908203125,
684.5892562866211
684.5892486572266
],
"page": 10,
"span": [
@ -4187,10 +4187,10 @@
"prov": [
{
"bbox": [
135.64834594726562,
135.64837646484375,
197.24334716796875,
301.23675537109375,
407.8263244628906
301.2367248535156,
407.8262939453125
],
"page": 11,
"span": [
@ -4209,10 +4209,10 @@
"prov": [
{
"bbox": [
63.80195617675781,
621.9679107666016,
547.1146850585938,
696.6176071166992
63.801902770996094,
621.9678497314453,
547.11474609375,
696.6175842285156
],
"page": 14,
"span": [
@ -4231,7 +4231,7 @@
"prov": [
{
"bbox": [
63.9850959777832,
63.985130310058594,
145.8603515625,
530.0478515625,
364.09503173828125
@ -4254,9 +4254,9 @@
{
"bbox": [
136.5016632080078,
314.45880126953125,
314.4587707519531,
545.4508666992188,
672.7509078979492
672.7508773803711
],
"page": 15,
"span": [
@ -4343,10 +4343,10 @@
"prov": [
{
"bbox": [
136.1495819091797,
76.3485107421875,
547.52685546875,
659.9669189453125
136.1496124267578,
76.34844970703125,
547.5267944335938,
659.9669647216797
],
"page": 2,
"span": [
@ -6705,9 +6705,9 @@
{
"bbox": [
135.52462768554688,
349.94940185546875,
349.949462890625,
545.8714599609375,
502.2746887207031
502.2747802734375
],
"page": 8,
"span": [
@ -7164,7 +7164,7 @@
64.41139221191406,
70.39208984375,
547.3950805664062,
398.3863525390625
398.3863830566406
],
"page": 9,
"span": [
@ -9130,7 +9130,7 @@
"prov": [
{
"bbox": [
63.55635070800781,
63.55636978149414,
495.77532958984375,
548.5687255859375,
687.7661285400391

File diff suppressed because it is too large Load Diff

View File

@ -1171,7 +1171,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -1311,7 +1311,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,
@ -1971,7 +1971,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -2117,7 +2117,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,
@ -2770,7 +2770,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -2916,7 +2916,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,

View File

@ -5951,7 +5951,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -7406,7 +7406,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -8046,7 +8046,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
}
@ -10042,7 +10042,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -11509,7 +11509,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -12155,7 +12155,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
},
@ -14148,7 +14148,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -15615,7 +15615,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -16261,7 +16261,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
},

View File

@ -744,7 +744,7 @@
388.5767822265625,
739.034423828125,
482.4759216308594,
806.0040969848633
806.0041046142578
],
"page": 1,
"span": [

View File

@ -1391,7 +1391,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -1477,7 +1477,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -1517,7 +1517,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,
@ -1597,7 +1597,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -2361,7 +2361,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -2891,7 +2891,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -2931,7 +2931,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -2971,7 +2971,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -3011,7 +3011,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -3051,7 +3051,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -3131,7 +3131,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -3236,7 +3236,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -3318,7 +3318,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -3822,7 +3822,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -3862,7 +3862,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -3902,7 +3902,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -3942,7 +3942,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -3982,7 +3982,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -4062,7 +4062,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -4102,7 +4102,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -4142,7 +4142,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -4182,7 +4182,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -4247,7 +4247,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -4287,7 +4287,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -4352,7 +4352,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -4432,7 +4432,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -4484,7 +4484,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -4580,7 +4580,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -4626,7 +4626,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,
@ -4718,7 +4718,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -5482,7 +5482,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -6012,7 +6012,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -6052,7 +6052,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -6092,7 +6092,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -6132,7 +6132,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -6172,7 +6172,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -6252,7 +6252,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -6357,7 +6357,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -6451,7 +6451,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -6955,7 +6955,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -6995,7 +6995,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -7035,7 +7035,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -7075,7 +7075,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -7115,7 +7115,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -7195,7 +7195,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -7235,7 +7235,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -7275,7 +7275,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -7315,7 +7315,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -7380,7 +7380,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -7420,7 +7420,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -7485,7 +7485,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -7565,7 +7565,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -7610,7 +7610,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -7706,7 +7706,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -7798,7 +7798,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -8562,7 +8562,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -9092,7 +9092,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -9132,7 +9132,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -9172,7 +9172,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -9212,7 +9212,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -9252,7 +9252,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -9332,7 +9332,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -9437,7 +9437,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -9531,7 +9531,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -10035,7 +10035,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -10075,7 +10075,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -10115,7 +10115,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -10155,7 +10155,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -10195,7 +10195,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -10275,7 +10275,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -10315,7 +10315,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -10355,7 +10355,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -10395,7 +10395,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -10460,7 +10460,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -10500,7 +10500,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -10565,7 +10565,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -10645,7 +10645,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -10695,7 +10695,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,

View File

@ -16094,9 +16094,9 @@
"page_no": 1,
"bbox": {
"l": 315.65362548828125,
"t": 563.2765655517578,
"t": 563.276611328125,
"r": 537.1475219726562,
"b": 489.19854736328125,
"b": 489.1985778808594,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -16443,9 +16443,9 @@
"page_no": 3,
"bbox": {
"l": 312.10369873046875,
"t": 713.5591125488281,
"t": 713.5591354370117,
"r": 550.38916015625,
"b": 541.3901214599609,
"b": 541.3901519775391,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -16658,7 +16658,7 @@
"page_no": 5,
"bbox": {
"l": 74.30525970458984,
"t": 714.0887908935547,
"t": 714.0887985229492,
"r": 519.9801025390625,
"b": 608.2984924316406,
"coord_origin": "BOTTOMLEFT"
@ -16829,7 +16829,7 @@
"bbox": {
"l": 53.03328323364258,
"t": 534.3346557617188,
"r": 285.3731384277344,
"r": 285.3731689453125,
"b": 284.3311462402344,
"coord_origin": "BOTTOMLEFT"
},
@ -16862,7 +16862,7 @@
"page_no": 8,
"bbox": {
"l": 49.97503662109375,
"t": 688.2873153686523,
"t": 688.2873382568359,
"r": 301.6335754394531,
"b": 604.4210662841797,
"coord_origin": "BOTTOMLEFT"
@ -16895,7 +16895,7 @@
{
"page_no": 8,
"bbox": {
"l": 305.58367919921875,
"l": 305.5836486816406,
"t": 693.3458404541016,
"r": 554.8258666992188,
"b": 611.3732452392578,
@ -16978,9 +16978,9 @@
"page_no": 8,
"bbox": {
"l": 216.76925659179688,
"t": 411.5093994140625,
"t": 411.5093688964844,
"r": 375.7829284667969,
"b": 348.6529541015625,
"b": 348.65301513671875,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -17177,9 +17177,9 @@
{
"page_no": 8,
"bbox": {
"l": 383.1363830566406,
"t": 410.7687072753906,
"r": 542.1131591796875,
"l": 383.1364440917969,
"t": 410.7686767578125,
"r": 542.1132202148438,
"b": 349.2250671386719,
"coord_origin": "BOTTOMLEFT"
},
@ -17482,7 +17482,7 @@
{
"page_no": 14,
"bbox": {
"l": 51.153778076171875,
"l": 51.15378952026367,
"t": 687.6914825439453,
"r": 282.8598937988281,
"b": 447.09332275390625,
@ -17580,10 +17580,10 @@
{
"page_no": 15,
"bbox": {
"l": 55.11635208129883,
"l": 55.116363525390625,
"t": 655.7449951171875,
"r": 279.370849609375,
"b": 542.6654968261719,
"b": 542.6654510498047,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -17701,9 +17701,9 @@
"page_no": 15,
"bbox": {
"l": 323.46868896484375,
"t": 429.5492248535156,
"t": 429.5491638183594,
"r": 525.9569091796875,
"b": 327.73956298828125,
"b": 327.739501953125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -17765,7 +17765,7 @@
"page_no": 16,
"bbox": {
"l": 66.79948425292969,
"t": 538.3837127685547,
"t": 538.3836822509766,
"r": 528.5565795898438,
"b": 293.8616027832031,
"coord_origin": "BOTTOMLEFT"
@ -17801,9 +17801,9 @@
"page_no": 1,
"bbox": {
"l": 315.65362548828125,
"t": 563.2765655517578,
"t": 563.276611328125,
"r": 537.1475219726562,
"b": 489.19854736328125,
"b": 489.1985778808594,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -18771,9 +18771,9 @@
{
"page_no": 4,
"bbox": {
"l": 310.6757507324219,
"t": 718.8061218261719,
"r": 542.9546508789062,
"l": 310.67584228515625,
"t": 718.8061141967773,
"r": 542.9547119140625,
"b": 636.7794799804688,
"coord_origin": "BOTTOMLEFT"
},
@ -22946,7 +22946,7 @@
"page_no": 7,
"bbox": {
"l": 332.9688720703125,
"t": 251.71649169921875,
"t": 251.7164306640625,
"r": 520.942138671875,
"b": 148.73028564453125,
"coord_origin": "BOTTOMLEFT"
@ -24069,9 +24069,9 @@
"page_no": 8,
"bbox": {
"l": 53.62853240966797,
"t": 573.0514526367188,
"t": 573.0514221191406,
"r": 298.5574951171875,
"b": 499.6000061035156,
"b": 499.60003662109375,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -27719,7 +27719,7 @@
{
"page_no": 13,
"bbox": {
"l": 83.31759643554688,
"l": 83.31756591796875,
"t": 395.9864501953125,
"r": 248.873046875,
"b": 304.7430114746094,
@ -27759,9 +27759,9 @@
"page_no": 13,
"bbox": {
"l": 310.3294372558594,
"t": 690.8223342895508,
"t": 690.8223266601562,
"r": 555.8338623046875,
"b": 655.8524932861328,
"b": 655.8524780273438,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -27798,9 +27798,9 @@
"page_no": 13,
"bbox": {
"l": 309.9566345214844,
"t": 637.3854827880859,
"t": 637.3855133056641,
"r": 555.7466430664062,
"b": 607.2774353027344,
"b": 607.2774658203125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -27832,10 +27832,10 @@
{
"page_no": 13,
"bbox": {
"l": 309.96356201171875,
"t": 596.2945098876953,
"r": 555.7053833007812,
"b": 558.4485168457031,
"l": 309.9635314941406,
"t": 596.2945861816406,
"r": 555.7054443359375,
"b": 558.4485473632812,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -28050,10 +28050,10 @@
{
"page_no": 14,
"bbox": {
"l": 51.726383209228516,
"t": 518.3907470703125,
"r": 283.1140441894531,
"b": 447.7555236816406,
"l": 51.72642135620117,
"t": 518.3907165527344,
"r": 283.114013671875,
"b": 447.7554931640625,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -28085,7 +28085,7 @@
{
"page_no": 14,
"bbox": {
"l": 51.43488693237305,
"l": 51.434879302978516,
"t": 338.51251220703125,
"r": 310.7267150878906,
"b": 300.17974853515625,
@ -28155,7 +28155,7 @@
{
"page_no": 14,
"bbox": {
"l": 51.2728271484375,
"l": 51.27280807495117,
"t": 238.271484375,
"r": 311.0897216796875,
"b": 200.086669921875,
@ -28408,9 +28408,9 @@
{
"page_no": 14,
"bbox": {
"l": 319.0649719238281,
"t": 182.1590576171875,
"r": 533.7738647460938,
"l": 319.06494140625,
"t": 182.1591796875,
"r": 533.77392578125,
"b": 122.80792236328125,
"coord_origin": "BOTTOMLEFT"
},
@ -28443,10 +28443,10 @@
{
"page_no": 15,
"bbox": {
"l": 55.11635208129883,
"l": 55.116363525390625,
"t": 655.7449951171875,
"r": 279.370849609375,
"b": 542.6654968261719,
"b": 542.6654510498047,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -28517,10 +28517,10 @@
{
"page_no": 15,
"bbox": {
"l": 323.0059814453125,
"t": 670.4528656005859,
"r": 525.9517211914062,
"b": 569.0885772705078,
"l": 323.0059509277344,
"t": 670.4528503417969,
"r": 525.95166015625,
"b": 569.0885925292969,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -28557,9 +28557,9 @@
"page_no": 15,
"bbox": {
"l": 323.384765625,
"t": 550.0270690917969,
"t": 550.0270538330078,
"r": 526.1268920898438,
"b": 447.9078674316406,
"b": 447.90789794921875,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -28592,9 +28592,9 @@
"page_no": 15,
"bbox": {
"l": 323.46868896484375,
"t": 429.5492248535156,
"t": 429.5491638183594,
"r": 525.9569091796875,
"b": 327.73956298828125,
"b": 327.739501953125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [

File diff suppressed because it is too large Load Diff

View File

@ -16866,10 +16866,10 @@
{
"page_no": 1,
"bbox": {
"l": 323.4081115722656,
"l": 323.408203125,
"t": 541.6512603759766,
"r": 553.295166015625,
"b": 266.14935302734375,
"r": 553.2952270507812,
"b": 266.1492919921875,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -16941,9 +16941,9 @@
"page_no": 3,
"bbox": {
"l": 88.33030700683594,
"t": 699.1134490966797,
"t": 699.1134796142578,
"r": 263.7049560546875,
"b": 571.4317626953125,
"b": 571.4317321777344,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -16979,9 +16979,9 @@
"page_no": 4,
"bbox": {
"l": 53.05912780761719,
"t": 481.20867919921875,
"t": 481.2087097167969,
"r": 295.8506164550781,
"b": 251.1358642578125,
"b": 251.135986328125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -17255,9 +17255,9 @@
"page_no": 4,
"bbox": {
"l": 98.93103790283203,
"t": 654.5244903564453,
"t": 654.5245208740234,
"r": 512.579833984375,
"b": 497.91845703125,
"b": 497.91851806640625,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -23506,7 +23506,7 @@
"page_no": 6,
"bbox": {
"l": 62.02753829956055,
"t": 596.3199462890625,
"t": 596.3199310302734,
"r": 285.78955078125,
"b": 440.3381042480469,
"coord_origin": "BOTTOMLEFT"
@ -26668,9 +26668,9 @@
{
"page_no": 7,
"bbox": {
"l": 80.35527038574219,
"l": 80.35525512695312,
"t": 641.0637054443359,
"r": 267.00823974609375,
"r": 267.0082092285156,
"b": 496.5545349121094,
"coord_origin": "BOTTOMLEFT"
},
@ -31588,10 +31588,10 @@
{
"page_no": 8,
"bbox": {
"l": 72.65901947021484,
"t": 619.5191650390625,
"r": 274.8346862792969,
"b": 452.14599609375,
"l": 72.6590347290039,
"t": 619.5191955566406,
"r": 274.83465576171875,
"b": 452.1459655761719,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [

File diff suppressed because it is too large Load Diff

View File

@ -336,9 +336,9 @@
{
"page_no": 1,
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 454.4546203613281,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 322.5054626464844,
"coord_origin": "BOTTOMLEFT"
},

View File

@ -2646,7 +2646,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -2686,7 +2686,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,
@ -2816,7 +2816,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -2881,7 +2881,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -3096,7 +3096,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -3280,9 +3280,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -7852,7 +7852,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -8184,9 +8184,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -13582,7 +13582,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -13628,7 +13628,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,
@ -13770,7 +13770,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -13841,7 +13841,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -14062,7 +14062,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -14252,9 +14252,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -19713,7 +19713,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -20153,7 +20153,7 @@
"b": 179.20818999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.957740306854248,
"confidence": 0.9577404260635376,
"cells": [
{
"index": 5,
@ -20224,7 +20224,7 @@
"b": 255.42400999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9850425124168396,
"confidence": 0.98504239320755,
"cells": [
{
"index": 7,
@ -20445,7 +20445,7 @@
"b": 327.98218,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9591907262802124,
"confidence": 0.9591910243034363,
"cells": [
{
"index": 15,
@ -20635,9 +20635,9 @@
"id": 0,
"label": "table",
"bbox": {
"l": 139.6674041748047,
"l": 139.66746520996094,
"t": 337.5453796386719,
"r": 475.00927734375,
"r": 475.0093078613281,
"b": 469.4945373535156,
"coord_origin": "TOPLEFT"
},
@ -26096,7 +26096,7 @@
"b": 618.3,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9849976301193237,
"confidence": 0.9849975109100342,
"cells": [
{
"index": 93,
@ -26440,7 +26440,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9373533129692078,
"confidence": 0.9373531937599182,
"cells": [
{
"index": 0,
@ -26486,7 +26486,7 @@
"b": 102.78223000000003,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8858681321144104,
"confidence": 0.8858677744865417,
"cells": [
{
"index": 1,

View File

@ -14942,9 +14942,9 @@
"page_no": 2,
"bbox": {
"l": 148.45364379882812,
"t": 583.6257476806641,
"t": 583.6257629394531,
"r": 464.3608093261719,
"b": 366.1538391113281,
"b": 366.1537780761719,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -15221,9 +15221,9 @@
{
"page_no": 7,
"bbox": {
"l": 164.6503143310547,
"l": 164.65028381347656,
"t": 628.2029113769531,
"r": 449.550537109375,
"r": 449.5505676269531,
"b": 511.6590576171875,
"coord_origin": "BOTTOMLEFT"
},
@ -15475,7 +15475,7 @@
{
"page_no": 8,
"bbox": {
"l": 140.70960998535156,
"l": 140.70968627929688,
"t": 283.9361572265625,
"r": 472.73382568359375,
"b": 198.32281494140625,
@ -15804,10 +15804,10 @@
{
"page_no": 10,
"bbox": {
"l": 162.67434692382812,
"t": 347.3774719238281,
"r": 451.70068359375,
"b": 128.786376953125,
"l": 162.67430114746094,
"t": 347.37744140625,
"r": 451.70062255859375,
"b": 128.78643798828125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -15875,9 +15875,9 @@
{
"page_no": 11,
"bbox": {
"l": 168.3928985595703,
"l": 168.39285278320312,
"t": 610.0334930419922,
"r": 447.3513488769531,
"r": 447.35137939453125,
"b": 157.99432373046875,
"coord_origin": "BOTTOMLEFT"
},
@ -17702,7 +17702,7 @@
"page_no": 10,
"bbox": {
"l": 143.6376495361328,
"t": 635.6522827148438,
"t": 635.6522979736328,
"r": 470.8485412597656,
"b": 528.7375183105469,
"coord_origin": "BOTTOMLEFT"

File diff suppressed because it is too large Load Diff

View File

@ -951,7 +951,7 @@
"page_no": 1,
"bbox": {
"l": 320.4467468261719,
"t": 352.3592834472656,
"t": 352.359375,
"r": 558.8576049804688,
"b": 81.689208984375,
"coord_origin": "BOTTOMLEFT"

View File

@ -3411,7 +3411,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -4081,7 +4081,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -4611,7 +4611,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -4651,7 +4651,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -4711,12 +4711,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -5463,7 +5463,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -5528,7 +5528,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -5593,7 +5593,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,
@ -6624,7 +6624,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -7312,7 +7312,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -7854,7 +7854,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -7900,7 +7900,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -7966,12 +7966,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -8738,7 +8738,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -8809,7 +8809,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -8880,7 +8880,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,
@ -9904,7 +9904,7 @@
"b": 519.65363,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9695364832878113,
"confidence": 0.9695363640785217,
"cells": [
{
"index": 34,
@ -10592,7 +10592,7 @@
"b": 142.65363000000002,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9263732433319092,
"confidence": 0.9263731241226196,
"cells": [
{
"index": 59,
@ -11134,7 +11134,7 @@
"b": 382.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9253151416778564,
"confidence": 0.9253152012825012,
"cells": [
{
"index": 79,
@ -11180,7 +11180,7 @@
"b": 409.15362999999996,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9676452875137329,
"confidence": 0.9676451683044434,
"cells": [
{
"index": 80,
@ -11246,12 +11246,12 @@
"label": "picture",
"bbox": {
"l": 320.4467468261719,
"t": 421.6407165527344,
"t": 421.640625,
"r": 558.8576049804688,
"b": 692.310791015625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9881085753440857,
"confidence": 0.9881086945533752,
"cells": [
{
"index": 82,
@ -12018,7 +12018,7 @@
"b": 713.009598,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9449449777603149,
"confidence": 0.9449448585510254,
"cells": [
{
"index": 93,
@ -12089,7 +12089,7 @@
"b": 710.989597,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9497623443603516,
"confidence": 0.9497622847557068,
"cells": [
{
"index": 95,
@ -12162,7 +12162,7 @@
"b": 740.290298,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9368569850921631,
"confidence": 0.9368568658828735,
"cells": [
{
"index": 97,

View File

@ -1541,7 +1541,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -1581,7 +1581,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -2036,7 +2036,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -2719,7 +2719,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -2765,7 +2765,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -3232,7 +3232,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -3914,7 +3914,7 @@
"b": 358.76782,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5588339567184448,
"confidence": 0.5588350296020508,
"cells": [
{
"index": 18,
@ -3960,7 +3960,7 @@
"b": 406.74554,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6312211155891418,
"confidence": 0.6312209963798523,
"cells": [
{
"index": 19,
@ -4427,7 +4427,7 @@
"b": 607.23564,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9843752980232239,
"confidence": 0.9843751788139343,
"cells": [
{
"index": 36,
@ -5782,7 +5782,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -6302,7 +6302,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -7037,7 +7037,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,
@ -7092,7 +7092,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -7630,7 +7630,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -8389,7 +8389,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,
@ -8437,7 +8437,7 @@
"b": 137.5481507594625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9505067467689514,
"confidence": 0.950506865978241,
"cells": [
{
"index": 0,
@ -8975,7 +8975,7 @@
"b": 373.7119120634245,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8727476000785828,
"confidence": 0.8727474808692932,
"cells": [
{
"index": 19,
@ -9736,7 +9736,7 @@
"b": 704.5687238902275,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8504503965377808,
"confidence": 0.8504500389099121,
"cells": [
{
"index": 46,

View File

@ -245,7 +245,13 @@
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/6",
@ -269,7 +275,13 @@
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
"text": "This is another equation:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/8",
@ -305,7 +317,13 @@
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/11",
@ -413,7 +431,13 @@
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/20",
@ -437,7 +461,13 @@
"label": "paragraph",
"prov": [],
"orig": "This is another equation:",
"text": "This is another equation:"
"text": "This is another equation:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/22",
@ -485,7 +515,13 @@
"label": "paragraph",
"prov": [],
"orig": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text."
"text": "This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text. This is text.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/26",
@ -593,7 +629,13 @@
"label": "paragraph",
"prov": [],
"orig": "And that is an equation by itself. Cheers!",
"text": "And that is an equation by itself. Cheers!"
"text": "And that is an equation by itself. Cheers!",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/35",

View File

@ -61,7 +61,13 @@
"label": "paragraph",
"prov": [],
"orig": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi."
"text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Proin elit mi, fermentum vitae dolor facilisis, porttitor mollis quam. Cras quam massa, venenatis faucibus libero vel, euismod sollicitudin ipsum. Aliquam semper sapien leo, ac ultrices nibh mollis congue. Cras luctus ultrices est, ut scelerisque eros euismod ut. Curabitur ac tincidunt felis, non scelerisque lectus. Praesent sollicitudin vulputate est id consequat. Vestibulum pharetra ligula sit amet varius porttitor. Sed eros diam, gravida non varius at, scelerisque in libero. Ut auctor finibus mauris sit amet ornare. Sed facilisis leo at urna rhoncus, in facilisis arcu eleifend. Sed tincidunt lacinia fermentum. Cras non purus fringilla, semper quam non, sodales sem. Nulla facilisi.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/1",
@ -85,7 +91,13 @@
"label": "paragraph",
"prov": [],
"orig": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
"text": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet."
"text": "Duis condimentum dui eget ullamcorper maximus. Nulla tortor lectus, hendrerit at diam fermentum, euismod ornare orci. Integer ac mauris sed augue ultricies pellentesque. Etiam condimentum turpis a risus dictum, sed tempor arcu vestibulum. Quisque at venenatis tellus. Morbi id lobortis elit. In gravida metus at ornare suscipit. Donec euismod nibh sit amet commodo porttitor. Integer commodo sit amet nisi vel accumsan. Donec lacinia posuere porta. Pellentesque vulputate porta risus, vel consectetur nisl gravida sit amet. Nam scelerisque enim sodales lacus tempor, et tristique ante aliquet.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/3",
@ -109,7 +121,13 @@
"label": "paragraph",
"prov": [],
"orig": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
"text": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus."
"text": "Maecenas id neque pharetra, eleifend lectus a, vehicula sapien. Aliquam erat volutpat. Ut arcu erat, blandit id elementum at, aliquet pretium mauris. Nulla at semper orci. Nunc sed maximus metus. Duis eget tristique arcu. Phasellus fringilla augue est, ut bibendum est bibendum vitae. Nam et urna interdum, egestas velit a, consectetur metus. Pellentesque facilisis vehicula orci, eu posuere justo imperdiet non. Vestibulum tincidunt orci ac lorem consequat semper. Fusce semper sollicitudin orci, id lacinia nulla faucibus eu. Donec ut nisl metus.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/5",
@ -133,7 +151,13 @@
"label": "paragraph",
"prov": [],
"orig": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
"text": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl."
"text": "Duis ac tellus sed turpis feugiat aliquam sed vel justo. Fusce sit amet volutpat massa. Duis tristique finibus metus quis tincidunt. Etiam dapibus fringilla diam at pharetra. Vivamus dolor est, hendrerit ac ligula nec, pharetra lacinia sapien. Phasellus at malesuada orci. Maecenas est justo, mollis non ultrices ut, sagittis commodo odio. Integer viverra mauris pellentesque bibendum vestibulum. Sed eu felis mattis, efficitur justo non, finibus lorem. Phasellus viverra diam et sapien imperdiet interdum. Cras a convallis libero. Integer maximus dui vel lorem hendrerit, sit amet convallis ligula lobortis. Duis eu lacus elementum, scelerisque nunc eget, dignissim libero. Suspendisse mi quam, vehicula sit amet pellentesque rhoncus, blandit eu nisl.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/7",
@ -157,7 +181,13 @@
"label": "paragraph",
"prov": [],
"orig": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
"text": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh."
"text": "Nunc vehicula mattis erat ac consectetur. Etiam pharetra mauris ut tempor pellentesque. Sed vel libero vitae ante tempus sagittis vel sit amet dolor. Etiam faucibus viverra sodales. Pellentesque ullamcorper magna libero, non malesuada dui bibendum quis. Donec sed dolor non sem luctus volutpat. Morbi vel diam ut urna euismod gravida a id lectus. Vestibulum vel mauris eu tellus hendrerit dapibus. Etiam scelerisque lacus vel ante ultricies vulputate. In ullamcorper malesuada justo, vel scelerisque nisl lacinia at. Donec sodales interdum ipsum, ac bibendum ipsum pharetra interdum. Vivamus condimentum ac ante vel aliquam. Ut consectetur eu nibh nec gravida. Vestibulum accumsan, purus at mollis rutrum, sapien tortor accumsan purus, vitae fermentum urna mauris ut lacus. Fusce vitae leo sollicitudin, vehicula turpis eu, tempus nibh.",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
}
],
"pictures": [],

View File

@ -1071,7 +1071,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -1111,7 +1111,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -1176,7 +1176,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -1576,7 +1576,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -1946,7 +1946,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -1986,7 +1986,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -2051,7 +2051,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -2346,7 +2346,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -2392,7 +2392,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -2463,7 +2463,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -2893,7 +2893,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -3281,7 +3281,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -3327,7 +3327,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -3398,7 +3398,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -3692,7 +3692,7 @@
"b": 85.87195029682243,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9216852784156799,
"confidence": 0.9216853976249695,
"cells": [
{
"index": 0,
@ -3738,7 +3738,7 @@
"b": 127.39196044033929,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9795150756835938,
"confidence": 0.9795149564743042,
"cells": [
{
"index": 1,
@ -3809,7 +3809,7 @@
"b": 156.98303054262306,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9472767114639282,
"confidence": 0.9472769498825073,
"cells": [
{
"index": 3,
@ -4239,7 +4239,7 @@
"b": 477.07196164903314,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9806972742080688,
"confidence": 0.9806973934173584,
"cells": [
{
"index": 16,
@ -4627,7 +4627,7 @@
"b": 617.5429721345812,
"coord_origin": "TOPLEFT"
},
"confidence": 0.950114905834198,
"confidence": 0.9501149654388428,
"cells": [
{
"index": 29,
@ -4673,7 +4673,7 @@
"b": 659.2319622786822,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9778240919113159,
"confidence": 0.9778239727020264,
"cells": [
{
"index": 30,
@ -4744,7 +4744,7 @@
"b": 714.4319424694847,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9782076478004456,
"confidence": 0.978207528591156,
"cells": [
{
"index": 32,
@ -5748,7 +5748,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -5878,7 +5878,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -6173,7 +6173,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -6313,7 +6313,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -6598,7 +6598,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -6740,7 +6740,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -7053,7 +7053,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -7199,7 +7199,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -7489,7 +7489,7 @@
"b": 113.47198039222405,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9813448190689087,
"confidence": 0.9813449382781982,
"cells": [
{
"index": 0,
@ -7631,7 +7631,7 @@
"b": 212.35199073400975,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798638820648193,
"confidence": 0.9798637628555298,
"cells": [
{
"index": 4,
@ -7944,7 +7944,7 @@
"b": 322.99194111644454,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9762884378433228,
"confidence": 0.9762883186340332,
"cells": [
{
"index": 14,
@ -8090,7 +8090,7 @@
"b": 380.18298131412945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9581918120384216,
"confidence": 0.9581919312477112,
"cells": [
{
"index": 19,
@ -10010,7 +10010,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -10380,7 +10380,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -10470,7 +10470,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -10585,7 +10585,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -10815,7 +10815,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -10930,7 +10930,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -11070,7 +11070,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -11553,7 +11553,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -11941,7 +11941,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -12037,7 +12037,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -12158,7 +12158,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -12400,7 +12400,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -12521,7 +12521,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -12667,7 +12667,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -13149,7 +13149,7 @@
"b": 280.9919409712686,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9800205230712891,
"confidence": 0.9800204038619995,
"cells": [
{
"index": 11,
@ -13537,7 +13537,7 @@
"b": 448.9919715519727,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9789240956306458,
"confidence": 0.9789239764213562,
"cells": [
{
"index": 24,
@ -13633,7 +13633,7 @@
"b": 490.51196169548945,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9704653024673462,
"confidence": 0.9704654216766357,
"cells": [
{
"index": 27,
@ -13754,7 +13754,7 @@
"b": 518.1119717908908,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9631043672561646,
"confidence": 0.963104248046875,
"cells": [
{
"index": 31,
@ -13996,7 +13996,7 @@
"b": 573.3119819816936,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9727876782417297,
"confidence": 0.9727875590324402,
"cells": [
{
"index": 39,
@ -14117,7 +14117,7 @@
"b": 614.8319721252104,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798402190208435,
"confidence": 0.9798403382301331,
"cells": [
{
"index": 43,
@ -14263,7 +14263,7 @@
"b": 672.2629723237247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9490435123443604,
"confidence": 0.9490436315536499,
"cells": [
{
"index": 48,
@ -15942,7 +15942,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -16222,7 +16222,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -16362,7 +16362,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -16772,7 +16772,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -17077,7 +17077,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -17257,7 +17257,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,
@ -17707,7 +17707,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -17999,7 +17999,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -18145,7 +18145,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -18579,7 +18579,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -18896,7 +18896,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -19088,7 +19088,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,
@ -19549,7 +19549,7 @@
"b": 113.23199039139433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9798315167427063,
"confidence": 0.9798316359519958,
"cells": [
{
"index": 0,
@ -19841,7 +19841,7 @@
"b": 196.27197067842803,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9792094230651855,
"confidence": 0.9792095422744751,
"cells": [
{
"index": 10,
@ -19987,7 +19987,7 @@
"b": 253.463010876113,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9634494781494141,
"confidence": 0.9634493589401245,
"cells": [
{
"index": 15,
@ -20421,7 +20421,7 @@
"b": 460.751981592622,
"coord_origin": "TOPLEFT"
},
"confidence": 0.979421854019165,
"confidence": 0.9794219732284546,
"cells": [
{
"index": 29,
@ -20738,7 +20738,7 @@
"b": 543.7919618796556,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9810317158699036,
"confidence": 0.9810318350791931,
"cells": [
{
"index": 40,
@ -20930,7 +20930,7 @@
"b": 642.6719622214413,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9675389528274536,
"confidence": 0.9675387144088745,
"cells": [
{
"index": 46,

View File

@ -334,7 +334,7 @@
{
"page_no": 1,
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 487.109375,
"r": 475.6635437011719,
"b": 281.78173828125,
@ -372,7 +372,7 @@
{
"page_no": 2,
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 513.9846496582031,
"r": 391.96246337890625,
"b": 283.10589599609375,

View File

@ -1390,7 +1390,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -2174,7 +2174,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -2909,7 +2909,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 134.9199981689453,
"l": 134.9200439453125,
"t": 304.890625,
"r": 475.6635437011719,
"b": 510.21826171875,
@ -3623,7 +3623,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -3938,7 +3938,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -4302,7 +4302,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,
@ -4337,7 +4337,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -4658,7 +4658,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -5040,7 +5040,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,
@ -5072,7 +5072,7 @@
"b": 268.20489999999995,
"coord_origin": "TOPLEFT"
},
"confidence": 0.987092912197113,
"confidence": 0.9870928525924683,
"cells": [
{
"index": 0,
@ -5393,7 +5393,7 @@
"b": 532.05774,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9494234323501587,
"confidence": 0.9494236707687378,
"cells": [
{
"index": 12,
@ -5729,7 +5729,7 @@
"id": 2,
"label": "picture",
"bbox": {
"l": 218.81556701660156,
"l": 218.8155517578125,
"t": 278.0153503417969,
"r": 391.96246337890625,
"b": 508.89410400390625,

View File

@ -326,8 +326,8 @@
]
}
],
"orig": "Let\u2019s introduce a list",
"text": "Let\u2019s introduce a list"
"orig": "Lets introduce a list",
"text": "Lets introduce a list"
},
{
"self_ref": "#/texts/4",

View File

@ -8589,7 +8589,7 @@
{
"page_no": 1,
"bbox": {
"l": 33.09052658081055,
"l": 33.09040069580078,
"t": 498.9671630859375,
"r": 585.1502075195312,
"b": 89.5469970703125,
@ -8683,9 +8683,9 @@
{
"page_no": 3,
"bbox": {
"l": 64.16704559326172,
"l": 64.1669921875,
"t": 188.49365234375,
"r": 258.77435302734375,
"r": 258.7742919921875,
"b": 103.87176513671875,
"coord_origin": "BOTTOMLEFT"
},
@ -8743,7 +8743,7 @@
{
"page_no": 4,
"bbox": {
"l": 145.41448974609375,
"l": 145.4144744873047,
"t": 264.7552490234375,
"r": 252.08840942382812,
"b": 156.616943359375,
@ -8773,10 +8773,10 @@
{
"page_no": 5,
"bbox": {
"l": 32.075260162353516,
"t": 721.4226608276367,
"l": 32.075252532958984,
"t": 721.4226226806641,
"r": 239.620361328125,
"b": 554.0421142578125,
"b": 554.0420684814453,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -8996,7 +8996,7 @@
"page_no": 10,
"bbox": {
"l": 135.97177124023438,
"t": 684.5892562866211,
"t": 684.5892486572266,
"r": 545.4180908203125,
"b": 381.39068603515625,
"coord_origin": "BOTTOMLEFT"
@ -9063,9 +9063,9 @@
{
"page_no": 11,
"bbox": {
"l": 135.64834594726562,
"t": 407.8263244628906,
"r": 301.23675537109375,
"l": 135.64837646484375,
"t": 407.8262939453125,
"r": 301.2367248535156,
"b": 197.24334716796875,
"coord_origin": "BOTTOMLEFT"
},
@ -9101,10 +9101,10 @@
{
"page_no": 14,
"bbox": {
"l": 63.80195617675781,
"t": 696.6176071166992,
"r": 547.1146850585938,
"b": 621.9679107666016,
"l": 63.801902770996094,
"t": 696.6175842285156,
"r": 547.11474609375,
"b": 621.9678497314453,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -9139,7 +9139,7 @@
{
"page_no": 14,
"bbox": {
"l": 63.9850959777832,
"l": 63.985130310058594,
"t": 364.09503173828125,
"r": 530.0478515625,
"b": 145.8603515625,
@ -9178,9 +9178,9 @@
"page_no": 15,
"bbox": {
"l": 136.5016632080078,
"t": 672.7509078979492,
"t": 672.7508773803711,
"r": 545.4508666992188,
"b": 314.45880126953125,
"b": 314.4587707519531,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -9322,10 +9322,10 @@
{
"page_no": 2,
"bbox": {
"l": 136.1495819091797,
"t": 659.9669189453125,
"r": 547.52685546875,
"b": 76.3485107421875,
"l": 136.1496124267578,
"t": 659.9669647216797,
"r": 547.5267944335938,
"b": 76.34844970703125,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -12490,9 +12490,9 @@
"page_no": 8,
"bbox": {
"l": 135.52462768554688,
"t": 502.2746887207031,
"t": 502.2747802734375,
"r": 545.8714599609375,
"b": 349.94940185546875,
"b": 349.949462890625,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
@ -13115,7 +13115,7 @@
"page_no": 9,
"bbox": {
"l": 64.41139221191406,
"t": 398.3863525390625,
"t": 398.3863830566406,
"r": 547.3950805664062,
"b": 70.39208984375,
"coord_origin": "BOTTOMLEFT"
@ -15731,7 +15731,7 @@
{
"page_no": 12,
"bbox": {
"l": 63.55635070800781,
"l": 63.55636978149414,
"t": 687.7661285400391,
"r": 548.5687255859375,
"b": 495.77532958984375,

File diff suppressed because it is too large Load Diff

View File

@ -1171,7 +1171,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -1311,7 +1311,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,
@ -1971,7 +1971,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -2117,7 +2117,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,
@ -2770,7 +2770,7 @@
"b": 295.08200000000005,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9547481536865234,
"confidence": 0.9547483325004578,
"cells": [
{
"index": 17,
@ -2916,7 +2916,7 @@
"b": 350.522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9672118425369263,
"confidence": 0.9672117233276367,
"cells": [
{
"index": 22,

View File

@ -5951,7 +5951,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -7406,7 +7406,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -8046,7 +8046,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
}
@ -10042,7 +10042,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -11509,7 +11509,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -12155,7 +12155,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
},
@ -14148,7 +14148,7 @@
"b": 465.596681609368,
"coord_origin": "TOPLEFT"
},
"confidence": 0.93938809633255,
"confidence": 0.9393879771232605,
"cells": [
{
"index": 77,
@ -15615,7 +15615,7 @@
"b": 534.1167018462124,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5769621729850769,
"confidence": 0.5769620537757874,
"cells": [
{
"index": 134,
@ -16261,7 +16261,7 @@
"b": 650.6431884765625,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6444893479347229,
"confidence": 0.6444889903068542,
"cells": [],
"children": []
},

View File

@ -1102,7 +1102,7 @@
"page_no": 1,
"bbox": {
"l": 388.5767822265625,
"t": 806.0040969848633,
"t": 806.0041046142578,
"r": 482.4759216308594,
"b": 739.034423828125,
"coord_origin": "BOTTOMLEFT"

View File

@ -1391,7 +1391,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -1477,7 +1477,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -1517,7 +1517,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,
@ -1597,7 +1597,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -2361,7 +2361,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -2891,7 +2891,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -2931,7 +2931,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -2971,7 +2971,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -3011,7 +3011,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -3051,7 +3051,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -3131,7 +3131,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -3236,7 +3236,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -3318,7 +3318,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -3822,7 +3822,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -3862,7 +3862,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -3902,7 +3902,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -3942,7 +3942,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -3982,7 +3982,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -4062,7 +4062,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -4102,7 +4102,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -4142,7 +4142,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -4182,7 +4182,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -4247,7 +4247,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -4287,7 +4287,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -4352,7 +4352,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -4432,7 +4432,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -4484,7 +4484,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -4580,7 +4580,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -4626,7 +4626,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,
@ -4718,7 +4718,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -5482,7 +5482,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -6012,7 +6012,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -6052,7 +6052,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -6092,7 +6092,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -6132,7 +6132,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -6172,7 +6172,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -6252,7 +6252,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -6357,7 +6357,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -6451,7 +6451,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -6955,7 +6955,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -6995,7 +6995,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -7035,7 +7035,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -7075,7 +7075,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -7115,7 +7115,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -7195,7 +7195,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -7235,7 +7235,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -7275,7 +7275,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -7315,7 +7315,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -7380,7 +7380,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -7420,7 +7420,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -7485,7 +7485,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -7565,7 +7565,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -7610,7 +7610,7 @@
"label": "picture",
"bbox": {
"l": 388.5767822265625,
"t": 36.03588104248047,
"t": 36.03587341308594,
"r": 482.4759216308594,
"b": 103.00555419921875,
"coord_origin": "TOPLEFT"
@ -7706,7 +7706,7 @@
"b": 81.03008981017001,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6917959451675415,
"confidence": 0.6917961239814758,
"cells": [
{
"index": 2,
@ -7798,7 +7798,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6362582445144653,
"confidence": 0.6362584233283997,
"cells": [
{
"index": 5,
@ -8562,7 +8562,7 @@
"b": 179.2998695799522,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7258322834968567,
"confidence": 0.7258325815200806,
"cells": [
{
"index": 5,
@ -9092,7 +9092,7 @@
"b": 233.17986945372706,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8121819496154785,
"confidence": 0.8121814727783203,
"cells": [
{
"index": 25,
@ -9132,7 +9132,7 @@
"b": 228.73998946412837,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7672221660614014,
"confidence": 0.7672220468521118,
"cells": [
{
"index": 26,
@ -9172,7 +9172,7 @@
"b": 255.88982940052404,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8320454955101013,
"confidence": 0.8320456743240356,
"cells": [
{
"index": 27,
@ -9212,7 +9212,7 @@
"b": 251.44994941092557,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5538817644119263,
"confidence": 0.5538824796676636,
"cells": [
{
"index": 28,
@ -9252,7 +9252,7 @@
"b": 278.5698293473914,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7908995151519775,
"confidence": 0.7909000515937805,
"cells": [
{
"index": 29,
@ -9332,7 +9332,7 @@
"b": 323.44982924225053,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6534579396247864,
"confidence": 0.6534578204154968,
"cells": [
{
"index": 31,
@ -9437,7 +9437,7 @@
"b": 296.80999930466,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5417138934135437,
"confidence": 0.5417144298553467,
"cells": [
{
"index": 35,
@ -9531,7 +9531,7 @@
"b": 596.0198686036978,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7191378474235535,
"confidence": 0.719137966632843,
"cells": [
{
"index": 37,
@ -10035,7 +10035,7 @@
"b": 386.56997909437825,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8262876868247986,
"confidence": 0.8262879252433777,
"cells": [
{
"index": 38,
@ -10075,7 +10075,7 @@
"b": 413.70983903079747,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7766715884208679,
"confidence": 0.7766718864440918,
"cells": [
{
"index": 39,
@ -10115,7 +10115,7 @@
"b": 409.26995904119883,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8204739093780518,
"confidence": 0.8204737901687622,
"cells": [
{
"index": 40,
@ -10155,7 +10155,7 @@
"b": 436.3898589776647,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7670676708221436,
"confidence": 0.7670677900314331,
"cells": [
{
"index": 41,
@ -10195,7 +10195,7 @@
"b": 432.0699789877849,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8048339486122131,
"confidence": 0.8048340082168579,
"cells": [
{
"index": 42,
@ -10275,7 +10275,7 @@
"b": 454.7499689346523,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8273372054100037,
"confidence": 0.827337384223938,
"cells": [
{
"index": 44,
@ -10315,7 +10315,7 @@
"b": 481.8698388711183,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7342240214347839,
"confidence": 0.7342236638069153,
"cells": [
{
"index": 45,
@ -10355,7 +10355,7 @@
"b": 477.42995888151955,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8411223888397217,
"confidence": 0.8411222696304321,
"cells": [
{
"index": 46,
@ -10395,7 +10395,7 @@
"b": 528.3098487623228,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7251590490341187,
"confidence": 0.7251589894294739,
"cells": [
{
"index": 47,
@ -10460,7 +10460,7 @@
"b": 501.78997882445117,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7848676443099976,
"confidence": 0.7848678827285767,
"cells": [
{
"index": 49,
@ -10500,7 +10500,7 @@
"b": 573.2198486571116,
"coord_origin": "TOPLEFT"
},
"confidence": 0.758643627166748,
"confidence": 0.7586438059806824,
"cells": [
{
"index": 50,
@ -10565,7 +10565,7 @@
"b": 546.69997871924,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7897858619689941,
"confidence": 0.7897851467132568,
"cells": [
{
"index": 52,
@ -10645,7 +10645,7 @@
"b": 591.5799886140991,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8144806027412415,
"confidence": 0.8144810795783997,
"cells": [
{
"index": 54,
@ -10695,7 +10695,7 @@
"b": 790.0379791491694,
"coord_origin": "TOPLEFT"
},
"confidence": 0.8992282152175903,
"confidence": 0.899228036403656,
"cells": [
{
"index": 3,

View File

@ -74,6 +74,12 @@
"prov": [],
"orig": "Hello world1",
"text": "Hello world1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -88,6 +94,12 @@
"prov": [],
"orig": "Hello2",
"text": "Hello2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -113,7 +125,13 @@
"label": "paragraph",
"prov": [],
"orig": "Some text before",
"text": "Some text before"
"text": "Some text before",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/4",
@ -149,7 +167,13 @@
"label": "paragraph",
"prov": [],
"orig": "Some text after",
"text": "Some text after"
"text": "Some text after",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
}
],
"pictures": [],

View File

@ -55,7 +55,13 @@
"label": "paragraph",
"prov": [],
"orig": "Test with three images in unusual formats",
"text": "Test with three images in unusual formats"
"text": "Test with three images in unusual formats",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/1",
@ -67,7 +73,13 @@
"label": "paragraph",
"prov": [],
"orig": "Raster in emf:",
"text": "Raster in emf:"
"text": "Raster in emf:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/2",
@ -79,7 +91,13 @@
"label": "paragraph",
"prov": [],
"orig": "Vector in emf:",
"text": "Vector in emf:"
"text": "Vector in emf:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/3",
@ -91,7 +109,13 @@
"label": "paragraph",
"prov": [],
"orig": "Raster in webp:",
"text": "Raster in webp:"
"text": "Raster in webp:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
}
],
"pictures": [

View File

@ -232,6 +232,12 @@
"prov": [],
"orig": "hyperlink",
"text": "hyperlink",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"hyperlink": "https:/github.com/DS4SD/docling"
},
{
@ -263,7 +269,13 @@
"label": "paragraph",
"prov": [],
"orig": "Normal",
"text": "Normal"
"text": "Normal",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/6",
@ -329,7 +341,13 @@
"label": "paragraph",
"prov": [],
"orig": "and",
"text": "and"
"text": "and",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/10",
@ -342,6 +360,12 @@
"prov": [],
"orig": "hyperlink",
"text": "hyperlink",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"hyperlink": "https:/github.com/DS4SD/docling"
},
{
@ -354,7 +378,13 @@
"label": "paragraph",
"prov": [],
"orig": "on the same line",
"text": "on the same line"
"text": "on the same line",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/12",
@ -439,6 +469,12 @@
"prov": [],
"orig": "Some",
"text": "Some",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -513,6 +549,12 @@
"prov": [],
"orig": "Nested",
"text": "Nested",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},

View File

@ -133,7 +133,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1",
"text": "Paragraph 1.1"
"text": "Paragraph 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/5",
@ -157,7 +163,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2",
"text": "Paragraph 1.2"
"text": "Paragraph 1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/7",
@ -222,7 +234,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1"
"text": "Paragraph 1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/11",
@ -246,7 +264,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2"
"text": "Paragraph 1.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/13",
@ -314,7 +338,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1"
"text": "Paragraph 1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/17",
@ -338,7 +368,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2"
"text": "Paragraph 1.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/19",
@ -406,7 +442,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1"
"text": "Paragraph 1.2.3.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/23",
@ -430,7 +472,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1"
"text": "Paragraph 1.2.3.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/25",
@ -513,7 +561,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1",
"text": "Paragraph 2.1"
"text": "Paragraph 2.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/30",
@ -537,7 +591,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.2",
"text": "Paragraph 2.2"
"text": "Paragraph 2.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/32",
@ -602,7 +662,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1"
"text": "Paragraph 2.1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/36",
@ -626,7 +692,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1"
"text": "Paragraph 2.1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/38",
@ -694,7 +766,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1"
"text": "Paragraph 2.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/42",
@ -718,7 +796,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2"
"text": "Paragraph 2.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/44",

View File

@ -209,7 +209,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1",
"text": "Paragraph 1.1"
"text": "Paragraph 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/5",
@ -233,7 +239,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2",
"text": "Paragraph 1.2"
"text": "Paragraph 1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/7",
@ -298,7 +310,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1"
"text": "Paragraph 1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/11",
@ -322,7 +340,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2"
"text": "Paragraph 1.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/13",
@ -390,7 +414,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.1",
"text": "Paragraph 1.1.1"
"text": "Paragraph 1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/17",
@ -414,7 +444,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.1.2",
"text": "Paragraph 1.1.2"
"text": "Paragraph 1.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/19",
@ -482,7 +518,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1"
"text": "Paragraph 1.2.3.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/23",
@ -506,7 +548,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 1.2.3.1",
"text": "Paragraph 1.2.3.1"
"text": "Paragraph 1.2.3.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/25",
@ -567,7 +615,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1",
"text": "Paragraph 2.1"
"text": "Paragraph 2.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/30",
@ -591,7 +645,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.2",
"text": "Paragraph 2.2"
"text": "Paragraph 2.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/32",
@ -656,7 +716,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1"
"text": "Paragraph 2.1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/36",
@ -680,7 +746,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1.1",
"text": "Paragraph 2.1.1.1"
"text": "Paragraph 2.1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/38",
@ -748,7 +820,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1"
"text": "Paragraph 2.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/42",
@ -772,7 +850,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2"
"text": "Paragraph 2.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/44",

View File

@ -365,7 +365,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.1",
"text": "Paragraph 2.1.1"
"text": "Paragraph 2.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/4",
@ -389,7 +395,13 @@
"label": "paragraph",
"prov": [],
"orig": "Paragraph 2.1.2",
"text": "Paragraph 2.1.2"
"text": "Paragraph 2.1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/6",
@ -434,6 +446,12 @@
"prov": [],
"orig": "List item 1",
"text": "List item 1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -448,6 +466,12 @@
"prov": [],
"orig": "List item 2",
"text": "List item 2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -462,6 +486,12 @@
"prov": [],
"orig": "List item 3",
"text": "List item 3",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -508,6 +538,12 @@
"prov": [],
"orig": "List item a",
"text": "List item a",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -522,6 +558,12 @@
"prov": [],
"orig": "List item b",
"text": "List item b",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -536,6 +578,12 @@
"prov": [],
"orig": "List item c",
"text": "List item c",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -582,6 +630,12 @@
"prov": [],
"orig": "List item 1",
"text": "List item 1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -596,6 +650,12 @@
"prov": [],
"orig": "List item 2",
"text": "List item 2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -610,6 +670,12 @@
"prov": [],
"orig": "List item 1.1",
"text": "List item 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -624,6 +690,12 @@
"prov": [],
"orig": "List item 1.2",
"text": "List item 1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -638,6 +710,12 @@
"prov": [],
"orig": "List item 1.3",
"text": "List item 1.3",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -652,6 +730,12 @@
"prov": [],
"orig": "List item 3",
"text": "List item 3",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -698,6 +782,12 @@
"prov": [],
"orig": "List item 1",
"text": "List item 1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -712,6 +802,12 @@
"prov": [],
"orig": "List item 1.1",
"text": "List item 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -726,6 +822,12 @@
"prov": [],
"orig": "List item 2",
"text": "List item 2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -772,6 +874,12 @@
"prov": [],
"orig": "List item 1",
"text": "List item 1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -786,6 +894,12 @@
"prov": [],
"orig": "List item 1.1",
"text": "List item 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -800,6 +914,12 @@
"prov": [],
"orig": "List item 1.1.1",
"text": "List item 1.1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -814,6 +934,12 @@
"prov": [],
"orig": "List item 3",
"text": "List item 3",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -866,6 +992,12 @@
"prov": [],
"orig": "List item 1",
"text": "List item 1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -880,6 +1012,12 @@
"prov": [],
"orig": "List item 2",
"text": "List item 2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -894,6 +1032,12 @@
"prov": [],
"orig": "List item 1.1",
"text": "List item 1.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -908,6 +1052,12 @@
"prov": [],
"orig": "List item 1.2",
"text": "List item 1.2",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -922,6 +1072,12 @@
"prov": [],
"orig": "List item 1.2.1",
"text": "List item 1.2.1",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -936,6 +1092,12 @@
"prov": [],
"orig": "List item 3",
"text": "List item 3",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},

File diff suppressed because it is too large Load Diff

View File

@ -101,7 +101,13 @@
"label": "paragraph",
"prov": [],
"orig": "Summer activities",
"text": "Summer activities"
"text": "Summer activities",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/1",
@ -138,7 +144,13 @@
"label": "paragraph",
"prov": [],
"orig": "Duck",
"text": "Duck"
"text": "Duck",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/3",
@ -150,7 +162,13 @@
"label": "paragraph",
"prov": [],
"orig": "Figure 1: This is a cute duckling",
"text": "Figure 1: This is a cute duckling"
"text": "Figure 1: This is a cute duckling",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/4",
@ -180,8 +198,8 @@
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Let\u2019s swim!",
"text": "Let\u2019s swim!",
"orig": "Lets swim!",
"text": "Lets swim!",
"level": 1
},
{
@ -194,7 +212,13 @@
"label": "paragraph",
"prov": [],
"orig": "To get started with swimming, first lay down in a water and try not to drown:",
"text": "To get started with swimming, first lay down in a water and try not to drown:"
"text": "To get started with swimming, first lay down in a water and try not to drown:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/6",
@ -207,6 +231,12 @@
"prov": [],
"orig": "You can relax and look around",
"text": "You can relax and look around",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -221,6 +251,12 @@
"prov": [],
"orig": "Paddle about",
"text": "Paddle about",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -235,6 +271,12 @@
"prov": [],
"orig": "Enjoy summer warmth",
"text": "Enjoy summer warmth",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -247,8 +289,14 @@
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Also, don\u2019t forget:",
"text": "Also, don\u2019t forget:"
"orig": "Also, dont forget:",
"text": "Also, dont forget:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/10",
@ -261,6 +309,12 @@
"prov": [],
"orig": "Wear sunglasses",
"text": "Wear sunglasses",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -273,8 +327,14 @@
"content_layer": "body",
"label": "list_item",
"prov": [],
"orig": "Don\u2019t forget to drink water",
"text": "Don\u2019t forget to drink water",
"orig": "Dont forget to drink water",
"text": "Dont forget to drink water",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -289,6 +349,12 @@
"prov": [],
"orig": "Use sun cream",
"text": "Use sun cream",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -301,8 +367,14 @@
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "Hmm, what else\u2026",
"text": "Hmm, what else\u2026"
"orig": "Hmm, what else…",
"text": "Hmm, what else…",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/14",
@ -335,8 +407,8 @@
"content_layer": "body",
"label": "section_header",
"prov": [],
"orig": "Let\u2019s eat",
"text": "Let\u2019s eat",
"orig": "Lets eat",
"text": "Lets eat",
"level": 2
},
{
@ -348,8 +420,14 @@
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice",
"text": "After we had a good day of swimming in the lake, it\u2019s important to eat something nice"
"orig": "After we had a good day of swimming in the lake, its important to eat something nice",
"text": "After we had a good day of swimming in the lake, its important to eat something nice",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/16",
@ -361,7 +439,13 @@
"label": "paragraph",
"prov": [],
"orig": "I like to eat leaves",
"text": "I like to eat leaves"
"text": "I like to eat leaves",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/17",
@ -373,7 +457,13 @@
"label": "paragraph",
"prov": [],
"orig": "Here are some interesting things a respectful duck could eat:",
"text": "Here are some interesting things a respectful duck could eat:"
"text": "Here are some interesting things a respectful duck could eat:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/18",
@ -396,8 +486,14 @@
"content_layer": "body",
"label": "paragraph",
"prov": [],
"orig": "And let\u2019s add another list in the end:",
"text": "And let\u2019s add another list in the end:"
"orig": "And lets add another list in the end:",
"text": "And lets add another list in the end:",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/20",
@ -410,6 +506,12 @@
"prov": [],
"orig": "Leaves",
"text": "Leaves",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -424,6 +526,12 @@
"prov": [],
"orig": "Berries",
"text": "Berries",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
},
@ -438,6 +546,12 @@
"prov": [],
"orig": "Grain",
"text": "Grain",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
},
"enumerated": false,
"marker": "-"
}

View File

@ -114,7 +114,13 @@
"label": "paragraph",
"prov": [],
"orig": "A uniform table",
"text": "A uniform table"
"text": "A uniform table",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/2",
@ -138,7 +144,13 @@
"label": "paragraph",
"prov": [],
"orig": "A non-uniform table with horizontal spans",
"text": "A non-uniform table with horizontal spans"
"text": "A non-uniform table with horizontal spans",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/4",
@ -162,7 +174,13 @@
"label": "paragraph",
"prov": [],
"orig": "A non-uniform table with horizontal spans in inner columns",
"text": "A non-uniform table with horizontal spans in inner columns"
"text": "A non-uniform table with horizontal spans in inner columns",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/6",
@ -186,7 +204,13 @@
"label": "paragraph",
"prov": [],
"orig": "A non-uniform table with vertical spans",
"text": "A non-uniform table with vertical spans"
"text": "A non-uniform table with vertical spans",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/8",
@ -210,7 +234,13 @@
"label": "paragraph",
"prov": [],
"orig": "A non-uniform table with all kinds of spans and empty cells",
"text": "A non-uniform table with all kinds of spans and empty cells"
"text": "A non-uniform table with all kinds of spans and empty cells",
"formatting": {
"bold": false,
"italic": false,
"underline": false,
"strikethrough": false
}
},
{
"self_ref": "#/texts/10",

View File

@ -1,2 +1,2 @@
<doctag><text><loc_58><loc_44><loc_426><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
</doctag>

View File

@ -1 +1,77 @@
{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.0, "t": 767.2550252278646, "r": 506.6666666666667, "b": 688.5883585611979, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}}
{
"schema_name": "DoclingDocument",
"version": "1.3.0",
"name": "webp-test",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 16115062463007057787,
"filename": "webp-test.webp",
"uri": null
},
"furniture": {
"self_ref": "#/furniture",
"parent": null,
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"parent": null,
"children": [
{
"cref": "#/texts/0"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 238.19302423176944,
"t": 2570.0959833241664,
"r": 1696.0985546594009,
"b": 2315.204273887442,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
94
]
}
],
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package",
"formatting": null,
"hyperlink": null
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 2000.0,
"height": 2829.0
},
"image": null,
"page_no": 1
}
}
}

File diff suppressed because one or more lines are too long

View File

@ -44,9 +44,9 @@
"prov": [
{
"bbox": [
69.6796630536824,
689.0124221922704,
504.8720051760782,
70.90211866351085,
689.216658542347,
504.8720079864275,
764.9216921155637
],
"page": 1,

View File

@ -40,14 +40,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -90,13 +90,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -132,14 +132,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -157,14 +157,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -195,13 +195,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -237,14 +237,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -262,14 +262,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -293,13 +293,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -335,14 +335,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -360,14 +360,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",

View File

@ -0,0 +1,3 @@
<document>
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph>
</document>

View File

@ -0,0 +1 @@
{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "ocr_test_rotated.pdf", "filename-prov": null, "document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6", "#-pages": 1, "collection-name": null, "description": null, "page-hashes": [{"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [131.21306574279092, 74.12495603322407, 152.19606490864376, 154.19400205373182], "page": 1, "span": [0, 7], "__ref_s3_data": null}], "text": "package", "type": "paragraph", "payload": null, "name": "Text", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 595.201171875, "page": 1, "width": 841.9216918945312}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null}

View File

@ -0,0 +1 @@
package

View File

@ -0,0 +1 @@
[{"page_no": 0, "size": {"width": 841.9216918945312, "height": 595.201171875}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "body": [{"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}, "confidence": 0.5234212875366211, "cells": [{"id": 2, "text": "package", "bbox": {"l": 131.21306574279092, "t": 441.0071698212682, "r": 152.19606490864376, "b": 521.0762158417759, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "package"}], "headers": [{"label": "page_header", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "page_header", "bbox": {"l": 77.10171546422428, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}, "confidence": 0.6016772389411926, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 77.10171546422428, "t": 89.23887398109309, "r": 96.6831586150625, "b": 520.7638577050515, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 100.55299576256091, "t": 89.12381765643227, "r": 124.91101654503161, "b": 523.3155494272656, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"}]}}]

View File

@ -0,0 +1,4 @@
<document>
<paragraph><location><page_1><loc_74><loc_16><loc_88><loc_18></location>package</paragraph>
<paragraph><location><page_1><loc_15><loc_9><loc_88><loc_15></location>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</paragraph>
</document>

View File

@ -0,0 +1,106 @@
{
"_name": "",
"type": "pdf-document",
"description": {
"title": null,
"abstract": null,
"authors": null,
"affiliations": null,
"subjects": null,
"keywords": null,
"publication_date": null,
"languages": null,
"license": null,
"publishers": null,
"url_refs": null,
"references": null,
"publication": null,
"reference_count": null,
"citation_count": null,
"citation_date": null,
"advanced": null,
"analytics": null,
"logs": [],
"collection": null,
"acquisition": null
},
"file-info": {
"filename": "ocr_test_rotated_180.pdf",
"filename-prov": null,
"document-hash": "a9cbfe0f2a71171face9ee31d2347ca4195649670ad75680520d67d4a863f982",
"#-pages": 1,
"collection-name": null,
"description": null,
"page-hashes": [
{
"hash": "baca27070f05dd84cf0903ded39bcf0fc1fa6ef0ac390e79cf8ba90c8c33ba49",
"model": "default",
"page": 1
}
]
},
"main-text": [
{
"prov": [
{
"bbox": [
441.304584329099,
132.09610360960653,
521.9863114205704,
151.67751306395223
],
"page": 1,
"span": [
0,
7
],
"__ref_s3_data": null
}
],
"text": "package",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
},
{
"prov": [
{
"bbox": [
89.12133215549848,
77.02339849621205,
523.3501733013318,
124.86176457554109
],
"page": 1,
"span": [
0,
86
],
"__ref_s3_data": null
}
],
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
}
],
"figures": [],
"tables": [],
"bitmaps": null,
"equations": [],
"footnotes": [],
"page-dimensions": [
{
"height": 841.9216918945312,
"page": 1,
"width": 595.201171875
}
],
"page-footers": [],
"page-headers": [],
"_s3_data": null,
"identifiers": null
}

View File

@ -0,0 +1,3 @@
package
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained

View File

@ -0,0 +1,445 @@
[
{
"page_no": 0,
"size": {
"width": 595.201171875,
"height": 841.9216918945312
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "text",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 2,
"page_no": 0,
"cluster": {
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 2,
"page_no": 0,
"cluster": {
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": []
}
}
]

View File

@ -0,0 +1,3 @@
<document>
<paragraph><location><page_1><loc_82><loc_74><loc_84><loc_88></location>package</paragraph>
</document>

View File

@ -0,0 +1,83 @@
{
"_name": "",
"type": "pdf-document",
"description": {
"title": null,
"abstract": null,
"authors": null,
"affiliations": null,
"subjects": null,
"keywords": null,
"publication_date": null,
"languages": null,
"license": null,
"publishers": null,
"url_refs": null,
"references": null,
"publication": null,
"reference_count": null,
"citation_count": null,
"citation_date": null,
"advanced": null,
"analytics": null,
"logs": [],
"collection": null,
"acquisition": null
},
"file-info": {
"filename": "ocr_test_rotated_270.pdf",
"filename-prov": null,
"document-hash": "52f54e7183bdb73aa3713c7b169baca93e276963a138418c26e7d6a1ea128f14",
"#-pages": 1,
"collection-name": null,
"description": null,
"page-hashes": [
{
"hash": "59bc9ddba89e7b008185dd16d384493beb034686e5670546786390c5d237a304",
"model": "default",
"page": 1
}
]
},
"main-text": [
{
"prov": [
{
"bbox": [
691.4680194659409,
442.3948768148814,
709.8255850278712,
523.0765988200898
],
"page": 1,
"span": [
0,
7
],
"__ref_s3_data": null
}
],
"text": "package",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
}
],
"figures": [],
"tables": [],
"bitmaps": null,
"equations": [],
"footnotes": [],
"page-dimensions": [
{
"height": 595.201171875,
"page": 1,
"width": 841.9216918945312
}
],
"page-footers": [],
"page-headers": [],
"_s3_data": null,
"identifiers": null
}

View File

@ -0,0 +1 @@
package

View File

@ -0,0 +1,446 @@
[
{
"page_no": 0,
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 8,
"page_no": 0,
"cluster": {
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 8,
"page_no": 0,
"cluster": {
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
}
]
}
}
]

View File

@ -0,0 +1,3 @@
<document>
<paragraph><location><page_1><loc_16><loc_12><loc_18><loc_26></location>package</paragraph>
</document>

View File

@ -0,0 +1,83 @@
{
"_name": "",
"type": "pdf-document",
"description": {
"title": null,
"abstract": null,
"authors": null,
"affiliations": null,
"subjects": null,
"keywords": null,
"publication_date": null,
"languages": null,
"license": null,
"publishers": null,
"url_refs": null,
"references": null,
"publication": null,
"reference_count": null,
"citation_count": null,
"citation_date": null,
"advanced": null,
"analytics": null,
"logs": [],
"collection": null,
"acquisition": null
},
"file-info": {
"filename": "ocr_test_rotated_90.pdf",
"filename-prov": null,
"document-hash": "4a282813d93824eaa9bc2a0b2a0d6d626ecc8f5f380bd1320e2dd3e8e53c2ba6",
"#-pages": 1,
"collection-name": null,
"description": null,
"page-hashes": [
{
"hash": "f8a4dc72d8b159f69d0bc968b97f3fb9e0ac59dcb3113492432755835935d9b3",
"model": "default",
"page": 1
}
]
},
"main-text": [
{
"prov": [
{
"bbox": [
131.21306574279092,
74.12495603322407,
152.19606490864376,
154.19400205373182
],
"page": 1,
"span": [
0,
7
],
"__ref_s3_data": null
}
],
"text": "package",
"type": "paragraph",
"payload": null,
"name": "Text",
"font": null
}
],
"figures": [],
"tables": [],
"bitmaps": null,
"equations": [],
"footnotes": [],
"page-dimensions": [
{
"height": 595.201171875,
"page": 1,
"width": 841.9216918945312
}
],
"page-footers": [],
"page-headers": [],
"_s3_data": null,
"identifiers": null
}

View File

@ -0,0 +1 @@
package

View File

@ -0,0 +1,446 @@
[
{
"page_no": 0,
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 1,
"page_no": 0,
"cluster": {
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 1,
"page_no": 0,
"cluster": {
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
}
]
}
}
]

View File

@ -1,2 +1,2 @@
<doctag><text><loc_59><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
<doctag><text><loc_60><loc_46><loc_424><loc_91>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package</text>
</doctag>

View File

@ -42,10 +42,10 @@
{
"page_no": 1,
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 764.9216921155637,
"r": 504.8720051760782,
"b": 689.0124221922704,
"r": 504.8720079864275,
"b": 689.216658542347,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [

View File

@ -40,14 +40,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -65,14 +65,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -90,13 +90,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -132,14 +132,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -157,14 +157,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -195,13 +195,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -237,14 +237,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -262,14 +262,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",
@ -293,13 +293,13 @@
"id": 0,
"label": "text",
"bbox": {
"l": 69.6796630536824,
"l": 70.90211866351085,
"t": 76.99999977896756,
"r": 504.8720051760782,
"b": 152.90926970226084,
"r": 504.8720079864275,
"b": 152.70503335218433,
"coord_origin": "TOPLEFT"
},
"confidence": 0.9715732336044312,
"confidence": 0.9715733528137207,
"cells": [
{
"index": 0,
@ -335,14 +335,14 @@
"a": 255
},
"rect": {
"r_x0": 69.6796630536824,
"r_y0": 124.83139494707741,
"r_x1": 504.8720051760782,
"r_y1": 124.83139494707741,
"r_x2": 504.8720051760782,
"r_y2": 104.00000011573796,
"r_x3": 69.6796630536824,
"r_y3": 104.00000011573796,
"r_x0": 70.90211866351085,
"r_y0": 124.83139551297342,
"r_x1": 504.8720079864275,
"r_y1": 124.83139551297342,
"r_x2": 504.8720079864275,
"r_y2": 102.66666671251768,
"r_x3": 70.90211866351085,
"r_y3": 102.66666671251768,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
@ -360,14 +360,14 @@
"a": 255
},
"rect": {
"r_x0": 71.84193505100733,
"r_y0": 152.90926970226084,
"r_x1": 153.088934155825,
"r_y1": 152.90926970226084,
"r_x2": 153.088934155825,
"r_y2": 129.797125232046,
"r_x3": 71.84193505100733,
"r_y3": 129.797125232046,
"r_x0": 73.10852522817731,
"r_y0": 152.70503335218433,
"r_x1": 153.04479435252625,
"r_y1": 152.70503335218433,
"r_x2": 153.04479435252625,
"r_y2": 130.00136157890958,
"r_x3": 73.10852522817731,
"r_y3": 130.00136157890958,
"coord_origin": "TOPLEFT"
},
"text": "package",

View File

@ -0,0 +1,3 @@
<doctag><text><loc_371><loc_410><loc_438><loc_422>package</text>
<text><loc_75><loc_426><loc_440><loc_454>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</text>
</doctag>

View File

@ -0,0 +1,109 @@
{
"schema_name": "DoclingDocument",
"version": "1.3.0",
"name": "ocr_test_rotated_180",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 2530576989861832966,
"filename": "ocr_test_rotated_180.pdf",
"uri": null
},
"furniture": {
"self_ref": "#/furniture",
"parent": null,
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"parent": null,
"children": [
{
"cref": "#/texts/0"
},
{
"cref": "#/texts/1"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 441.304584329099,
"t": 151.67751306395223,
"r": 521.9863114205704,
"b": 132.09610360960653,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
7
]
}
],
"orig": "package",
"text": "package",
"formatting": null,
"hyperlink": null
},
{
"self_ref": "#/texts/1",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 89.12133215549848,
"t": 124.86176457554109,
"r": 523.3501733013318,
"b": 77.02339849621205,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
86
]
}
],
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"formatting": null,
"hyperlink": null
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 595.201171875,
"height": 841.9216918945312
},
"image": null,
"page_no": 1
}
}
}

View File

@ -0,0 +1,3 @@
package
Docling bundles PDF document conversion to JSON and Markdown in an easy self contained

View File

@ -0,0 +1,445 @@
[
{
"page_no": 0,
"size": {
"width": 595.201171875,
"height": 841.9216918945312
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "text",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 2,
"page_no": 0,
"cluster": {
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "text",
"bbox": {
"l": 89.12133215549848,
"t": 717.0599273189902,
"r": 523.3501733013318,
"b": 764.8982933983192,
"coord_origin": "TOPLEFT"
},
"confidence": 0.7318570613861084,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 90.46133071208328,
"r_y0": 764.8982933983192,
"r_x1": 520.7638616365624,
"r_y1": 764.8982933983192,
"r_x2": 520.7638616365624,
"r_y2": 744.0929853742306,
"r_x3": 90.46133071208328,
"r_y3": 744.0929853742306,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 89.12133215549848,
"r_y0": 741.5247710689902,
"r_x1": 523.3501733013318,
"r_y1": 741.5247710689902,
"r_x2": 523.3501733013318,
"r_y2": 717.0599273189902,
"r_x3": 89.12133215549848,
"r_y3": 717.0599273189902,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 2,
"page_no": 0,
"cluster": {
"id": 2,
"label": "text",
"bbox": {
"l": 441.304584329099,
"t": 690.244178830579,
"r": 521.9863114205704,
"b": 709.8255882849247,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5982133150100708,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 441.304584329099,
"r_y0": 709.8255882849247,
"r_x1": 521.9863114205704,
"r_y1": 709.8255882849247,
"r_x2": 521.9863114205704,
"r_y2": 690.244178830579,
"r_x3": 441.304584329099,
"r_y3": 690.244178830579,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": []
}
}
]

View File

@ -0,0 +1,3 @@
<doctag><page_header><loc_426><loc_60><loc_454><loc_424>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
<text><loc_411><loc_61><loc_422><loc_128>package</text>
</doctag>

View File

@ -0,0 +1,109 @@
{
"schema_name": "DoclingDocument",
"version": "1.3.0",
"name": "ocr_test_rotated_270",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 10890858393843077593,
"filename": "ocr_test_rotated_270.pdf",
"uri": null
},
"furniture": {
"self_ref": "#/furniture",
"parent": null,
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"parent": null,
"children": [
{
"cref": "#/texts/0"
},
{
"cref": "#/texts/1"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "furniture",
"label": "page_header",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 717.1685859527342,
"t": 524.2990548540179,
"r": 764.8982839673505,
"b": 90.32916553110118,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
86
]
}
],
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"formatting": null,
"hyperlink": null
},
{
"self_ref": "#/texts/1",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 691.4680194659409,
"t": 523.0765988200898,
"r": 709.8255850278712,
"b": 442.3948768148814,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
7
]
}
],
"orig": "package",
"text": "package",
"formatting": null,
"hyperlink": null
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"image": null,
"page_no": 1
}
}
}

View File

@ -0,0 +1 @@
package

View File

@ -0,0 +1,446 @@
[
{
"page_no": 0,
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 8,
"page_no": 0,
"cluster": {
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 8,
"page_no": 0,
"cluster": {
"id": 8,
"label": "text",
"bbox": {
"l": 691.4680194659409,
"t": 72.12457305491027,
"r": 709.8255850278712,
"b": 152.80629506011857,
"coord_origin": "TOPLEFT"
},
"confidence": 1.0,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 691.4680194659409,
"r_y0": 152.80629506011857,
"r_x1": 709.8255850278712,
"r_y1": 152.80629506011857,
"r_x2": 709.8255850278712,
"r_y2": 72.12457305491027,
"r_x3": 691.4680194659409,
"r_y3": 72.12457305491027,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 717.1685859527342,
"t": 70.90211702098213,
"r": 764.8982839673505,
"b": 504.8720063438988,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6915205121040344,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 744.0930045534915,
"r_y0": 504.87200373583954,
"r_x1": 764.8982839673505,
"r_y1": 504.87200373583954,
"r_x2": 764.8982839673505,
"r_y2": 73.34702001188118,
"r_x3": 744.0930045534915,
"r_y3": 73.34702001188118,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 717.1685859527342,
"r_y0": 504.8720063438988,
"r_x1": 737.9738558298501,
"r_y1": 504.8720063438988,
"r_x2": 737.9738558298501,
"r_y2": 70.90211702098213,
"r_x3": 717.1685859527342,
"r_y3": 70.90211702098213,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
}
]
}
}
]

View File

@ -0,0 +1,3 @@
<doctag><page_header><loc_46><loc_75><loc_74><loc_440>Docling bundles PDF document conversion to JSON and Markdown in an easy self contained</page_header>
<text><loc_78><loc_370><loc_90><loc_438>package</text>
</doctag>

View File

@ -0,0 +1,109 @@
{
"schema_name": "DoclingDocument",
"version": "1.3.0",
"name": "ocr_test_rotated_90",
"origin": {
"mimetype": "application/pdf",
"binary_hash": 6989291015361162334,
"filename": "ocr_test_rotated_90.pdf",
"uri": null
},
"furniture": {
"self_ref": "#/furniture",
"parent": null,
"children": [],
"content_layer": "furniture",
"name": "_root_",
"label": "unspecified"
},
"body": {
"self_ref": "#/body",
"parent": null,
"children": [
{
"cref": "#/texts/0"
},
{
"cref": "#/texts/1"
}
],
"content_layer": "body",
"name": "_root_",
"label": "unspecified"
},
"groups": [],
"texts": [
{
"self_ref": "#/texts/0",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "furniture",
"label": "page_header",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 77.10171546422428,
"t": 506.07735421856773,
"r": 124.91101654503161,
"b": 71.88562244773436,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
86
]
}
],
"orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained",
"formatting": null,
"hyperlink": null
},
{
"self_ref": "#/texts/1",
"parent": {
"cref": "#/body"
},
"children": [],
"content_layer": "body",
"label": "text",
"prov": [
{
"page_no": 1,
"bbox": {
"l": 131.21306574279092,
"t": 154.19400205373182,
"r": 152.19606490864376,
"b": 74.12495603322407,
"coord_origin": "BOTTOMLEFT"
},
"charspan": [
0,
7
]
}
],
"orig": "package",
"text": "package",
"formatting": null,
"hyperlink": null
}
],
"pictures": [],
"tables": [],
"key_value_items": [],
"form_items": [],
"pages": {
"1": {
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"image": null,
"page_no": 1
}
}
}

View File

@ -0,0 +1 @@
package

View File

@ -0,0 +1,446 @@
[
{
"page_no": 0,
"size": {
"width": 841.9216918945312,
"height": 595.201171875
},
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"parsed_page": null,
"predictions": {
"layout": {
"clusters": [
{
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
{
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
}
]
},
"tablestructure": {
"table_map": {}
},
"figures_classification": null,
"equations_prediction": null,
"vlm_response": null
},
"assembled": {
"elements": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
},
{
"label": "text",
"id": 1,
"page_no": 0,
"cluster": {
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"body": [
{
"label": "text",
"id": 1,
"page_no": 0,
"cluster": {
"id": 1,
"label": "text",
"bbox": {
"l": 131.21306574279092,
"t": 441.0071698212682,
"r": 152.19606490864376,
"b": 521.0762158417759,
"coord_origin": "TOPLEFT"
},
"confidence": 0.5234212875366211,
"cells": [
{
"index": 2,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 131.21306574279092,
"r_y0": 521.0762158417759,
"r_x1": 152.19606490864376,
"r_y1": 521.0762158417759,
"r_x2": 152.19606490864376,
"r_y2": 441.0071698212682,
"r_x3": 131.21306574279092,
"r_y3": 441.0071698212682,
"coord_origin": "TOPLEFT"
},
"text": "package",
"orig": "package",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "package"
}
],
"headers": [
{
"label": "page_header",
"id": 0,
"page_no": 0,
"cluster": {
"id": 0,
"label": "page_header",
"bbox": {
"l": 77.10171546422428,
"t": 89.12381765643227,
"r": 124.91101654503161,
"b": 523.3155494272656,
"coord_origin": "TOPLEFT"
},
"confidence": 0.6016772389411926,
"cells": [
{
"index": 0,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 77.10171546422428,
"r_y0": 520.7638577050515,
"r_x1": 96.6831586150625,
"r_y1": 520.7638577050515,
"r_x2": 96.6831586150625,
"r_y2": 89.23887398109309,
"r_x3": 77.10171546422428,
"r_y3": 89.23887398109309,
"coord_origin": "TOPLEFT"
},
"text": "Docling bundles PDF document conversion to",
"orig": "Docling bundles PDF document conversion to",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
},
{
"index": 1,
"rgba": {
"r": 0,
"g": 0,
"b": 0,
"a": 255
},
"rect": {
"r_x0": 100.55299576256091,
"r_y0": 523.3155494272656,
"r_x1": 124.91101654503161,
"r_y1": 523.3155494272656,
"r_x2": 124.91101654503161,
"r_y2": 89.12381765643227,
"r_x3": 100.55299576256091,
"r_y3": 89.12381765643227,
"coord_origin": "TOPLEFT"
},
"text": "JSON and Markdown in an easy self contained",
"orig": "JSON and Markdown in an easy self contained",
"text_direction": "left_to_right",
"confidence": 1.0,
"from_ocr": true
}
],
"children": []
},
"text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained"
}
]
}
}
]

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -1,6 +1,6 @@
import sys
from pathlib import Path
from typing import List
from typing import List, Tuple
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.datamodel.base_models import InputFormat
@ -56,33 +56,35 @@ def get_converter(ocr_options: OcrOptions):
def test_e2e_conversions():
pdf_paths = get_pdf_paths()
engines: List[OcrOptions] = [
EasyOcrOptions(),
TesseractOcrOptions(),
TesseractCliOcrOptions(),
EasyOcrOptions(force_full_page_ocr=True),
TesseractOcrOptions(force_full_page_ocr=True),
TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]),
TesseractCliOcrOptions(force_full_page_ocr=True),
TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]),
engines: List[Tuple[OcrOptions, bool]] = [
(EasyOcrOptions(), False),
(TesseractOcrOptions(), True),
(TesseractCliOcrOptions(), True),
(EasyOcrOptions(force_full_page_ocr=True), False),
(TesseractOcrOptions(force_full_page_ocr=True), True),
(TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
(TesseractCliOcrOptions(force_full_page_ocr=True), True),
(TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]), True),
]
# rapidocr is only available for Python >=3.6,<3.13
if sys.version_info < (3, 13):
engines.append(RapidOcrOptions())
engines.append(RapidOcrOptions(force_full_page_ocr=True))
engines.append((RapidOcrOptions(), False))
engines.append((RapidOcrOptions(force_full_page_ocr=True), False))
# only works on mac
if "darwin" == sys.platform:
engines.append(OcrMacOptions())
engines.append(OcrMacOptions(force_full_page_ocr=True))
engines.append((OcrMacOptions(), True))
engines.append((OcrMacOptions(force_full_page_ocr=True), True))
for ocr_options in engines:
for ocr_options, supports_rotation in engines:
print(
f"Converting with ocr_engine: {ocr_options.kind}, language: {ocr_options.lang}"
)
converter = get_converter(ocr_options=ocr_options)
for pdf_path in pdf_paths:
if not supports_rotation and "rotated" in pdf_path.name:
continue
print(f"converting {pdf_path}")
doc_result: ConversionResult = converter.convert(pdf_path)