diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py index 829419a..397bfc4 100644 --- a/docling/backend/asciidoc_backend.py +++ b/docling/backend/asciidoc_backend.py @@ -24,7 +24,6 @@ _log = logging.getLogger(__name__) class AsciiDocBackend(DeclarativeDocumentBackend): - def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index ae47888..66dd4a2 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -215,7 +215,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend): label = DocItemLabel.CODE if len(text) == 0: return - doc.add_text(parent=self.parents[self.level], label=label, text=text) + doc.add_code(parent=self.parents[self.level], label=label, text=text) def handle_paragraph(self, element, idx, doc): """Handles paragraph tags (p).""" diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py index 2bcc6d7..8171085 100644 --- a/docling/backend/md_backend.py +++ b/docling/backend/md_backend.py @@ -3,19 +3,22 @@ import re import warnings from io import BytesIO from pathlib import Path -from typing import Set, Union +from typing import List, Optional, Set, Union import marko import marko.ext import marko.ext.gfm import marko.inline from docling_core.types.doc import ( + DocItem, DocItemLabel, DoclingDocument, DocumentOrigin, GroupLabel, + NodeItem, TableCell, TableData, + TextItem, ) from marko import Markdown @@ -27,8 +30,7 @@ _log = logging.getLogger(__name__) class MarkdownDocumentBackend(DeclarativeDocumentBackend): - - def shorten_underscore_sequences(self, markdown_text, max_length=10): + def shorten_underscore_sequences(self, markdown_text: str, max_length: int = 10): # This regex will match any sequence of underscores pattern = r"_+" @@ -90,13 +92,13 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): ) from e return - def close_table(self, doc=None): + def close_table(self, doc: DoclingDocument): if self.in_table: _log.debug("=== TABLE START ===") for md_table_row in self.md_table_buffer: _log.debug(md_table_row) _log.debug("=== TABLE END ===") - tcells = [] + tcells: List[TableCell] = [] result_table = [] for n, md_table_row in enumerate(self.md_table_buffer): data = [] @@ -137,15 +139,19 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): self.in_table = False self.md_table_buffer = [] # clean table markdown buffer # Initialize Docling TableData - data = TableData(num_rows=num_rows, num_cols=num_cols, table_cells=tcells) + table_data = TableData( + num_rows=num_rows, num_cols=num_cols, table_cells=tcells + ) # Populate for tcell in tcells: - data.table_cells.append(tcell) + table_data.table_cells.append(tcell) if len(tcells) > 0: - doc.add_table(data=data) + doc.add_table(data=table_data) return - def process_inline_text(self, parent_element, doc=None): + def process_inline_text( + self, parent_element: Optional[NodeItem], doc: DoclingDocument + ): # self.inline_text_buffer += str(text_in) txt = self.inline_text_buffer.strip() if len(txt) > 0: @@ -156,14 +162,20 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): ) self.inline_text_buffer = "" - def iterate_elements(self, element, depth=0, doc=None, parent_element=None): + def iterate_elements( + self, + element: marko.block.Element, + depth: int, + doc: DoclingDocument, + parent_element: Optional[NodeItem] = None, + ): # Iterates over all elements in the AST # Check for different element types and process relevant details if isinstance(element, marko.block.Heading): self.close_table(doc) self.process_inline_text(parent_element, doc) _log.debug( - f" - Heading level {element.level}, content: {element.children[0].children}" + f" - Heading level {element.level}, content: {element.children[0].children}" # type: ignore ) if element.level == 1: doc_label = DocItemLabel.TITLE @@ -172,10 +184,10 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): # Header could have arbitrary inclusion of bold, italic or emphasis, # hence we need to traverse the tree to get full text of a header - strings = [] + strings: List[str] = [] # Define a recursive function to traverse the tree - def traverse(node): + def traverse(node: marko.block.BlockElement): # Check if the node has a "children" attribute if hasattr(node, "children"): # If "children" is a list, continue traversal @@ -209,9 +221,13 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): self.process_inline_text(parent_element, doc) _log.debug(" - List item") - snippet_text = str(element.children[0].children[0].children) + snippet_text = str(element.children[0].children[0].children) # type: ignore is_numbered = False - if parent_element.label == GroupLabel.ORDERED_LIST: + if ( + parent_element is not None + and isinstance(parent_element, DocItem) + and parent_element.label == GroupLabel.ORDERED_LIST + ): is_numbered = True doc.add_list_item( enumerated=is_numbered, parent=parent_element, text=snippet_text @@ -221,7 +237,14 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): self.close_table(doc) self.process_inline_text(parent_element, doc) _log.debug(f" - Image with alt: {element.title}, url: {element.dest}") - doc.add_picture(parent=parent_element, caption=element.title) + + fig_caption: Optional[TextItem] = None + if element.title is not None and element.title != "": + fig_caption = doc.add_text( + label=DocItemLabel.CAPTION, text=element.title + ) + + doc.add_picture(parent=parent_element, caption=fig_caption) elif isinstance(element, marko.block.Paragraph): self.process_inline_text(parent_element, doc) @@ -252,27 +275,21 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): self.process_inline_text(parent_element, doc) _log.debug(f" - Code Span: {element.children}") snippet_text = str(element.children).strip() - doc.add_text( - label=DocItemLabel.CODE, parent=parent_element, text=snippet_text - ) + doc.add_code(parent=parent_element, text=snippet_text) elif isinstance(element, marko.block.CodeBlock): self.close_table(doc) self.process_inline_text(parent_element, doc) _log.debug(f" - Code Block: {element.children}") - snippet_text = str(element.children[0].children).strip() - doc.add_text( - label=DocItemLabel.CODE, parent=parent_element, text=snippet_text - ) + snippet_text = str(element.children[0].children).strip() # type: ignore + doc.add_code(parent=parent_element, text=snippet_text) elif isinstance(element, marko.block.FencedCode): self.close_table(doc) self.process_inline_text(parent_element, doc) _log.debug(f" - Code Block: {element.children}") - snippet_text = str(element.children[0].children).strip() - doc.add_text( - label=DocItemLabel.CODE, parent=parent_element, text=snippet_text - ) + snippet_text = str(element.children[0].children).strip() # type: ignore + doc.add_code(parent=parent_element, text=snippet_text) elif isinstance(element, marko.inline.LineBreak): self.process_inline_text(parent_element, doc) diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py index 508b0e8..2d200d7 100644 --- a/docling/backend/msexcel_backend.py +++ b/docling/backend/msexcel_backend.py @@ -44,7 +44,6 @@ class ExcelTable(BaseModel): class MsExcelDocumentBackend(DeclarativeDocumentBackend): - def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index f59356e..f8148d5 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -26,7 +26,6 @@ _log = logging.getLogger(__name__) class MsWordDocumentBackend(DeclarativeDocumentBackend): - def __init__(self, in_doc: "InputDocument", path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) self.XML_KEY = ( diff --git a/docling/backend/pdf_backend.py b/docling/backend/pdf_backend.py index cd7a081..35c83b8 100644 --- a/docling/backend/pdf_backend.py +++ b/docling/backend/pdf_backend.py @@ -12,7 +12,6 @@ from docling.datamodel.document import InputDocument class PdfPageBackend(ABC): - @abstractmethod def get_text_in_rect(self, bbox: BoundingBox) -> str: pass @@ -45,7 +44,6 @@ class PdfPageBackend(ABC): class PdfDocumentBackend(PaginatedDocumentBackend): - def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]): super().__init__(in_doc, path_or_stream) diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index eeec6ba..efdf3b1 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,17 +1,11 @@ import logging import os -import warnings from enum import Enum from pathlib import Path -from typing import Annotated, Any, Dict, List, Literal, Optional, Tuple, Type, Union +from typing import Any, List, Literal, Optional, Union -from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator -from pydantic_settings import ( - BaseSettings, - PydanticBaseSettingsSource, - SettingsConfigDict, -) -from typing_extensions import deprecated +from pydantic import BaseModel, ConfigDict, Field, model_validator +from pydantic_settings import BaseSettings, SettingsConfigDict _log = logging.getLogger(__name__) @@ -225,6 +219,8 @@ class PdfPipelineOptions(PipelineOptions): artifacts_path: Optional[Union[Path, str]] = None do_table_structure: bool = True # True: perform table structure extraction do_ocr: bool = True # True: perform OCR, replace programmatic PDF text + do_code_enrichment: bool = False # True: perform code OCR + do_formula_enrichment: bool = False # True: perform formula OCR, return Latex code table_structure_options: TableStructureOptions = TableStructureOptions() ocr_options: Union[ diff --git a/docling/models/base_model.py b/docling/models/base_model.py index 5a98382..08d728c 100644 --- a/docling/models/base_model.py +++ b/docling/models/base_model.py @@ -1,7 +1,7 @@ from abc import ABC, abstractmethod from typing import Any, Generic, Iterable, Optional -from docling_core.types.doc import DoclingDocument, NodeItem, TextItem +from docling_core.types.doc import BoundingBox, DoclingDocument, NodeItem, TextItem from typing_extensions import TypeVar from docling.datamodel.base_models import ItemAndImageEnrichmentElement, Page @@ -53,6 +53,7 @@ class BaseItemAndImageEnrichmentModel( ): images_scale: float + expansion_factor: float = 0.0 def prepare_element( self, conv_res: ConversionResult, element: NodeItem @@ -62,8 +63,22 @@ class BaseItemAndImageEnrichmentModel( assert isinstance(element, TextItem) element_prov = element.prov[0] + + bbox = element_prov.bbox + width = bbox.r - bbox.l + height = bbox.t - bbox.b + + # TODO: move to a utility in the BoundingBox class + expanded_bbox = BoundingBox( + l=bbox.l - width * self.expansion_factor, + t=bbox.t + height * self.expansion_factor, + r=bbox.r + width * self.expansion_factor, + b=bbox.b - height * self.expansion_factor, + coord_origin=bbox.coord_origin, + ) + page_ix = element_prov.page_no - 1 cropped_image = conv_res.pages[page_ix].get_image( - scale=self.images_scale, cropbox=element_prov.bbox + scale=self.images_scale, cropbox=expanded_bbox ) return ItemAndImageEnrichmentElement(item=element, image=cropped_image) diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py new file mode 100644 index 0000000..e4d5694 --- /dev/null +++ b/docling/models/code_formula_model.py @@ -0,0 +1,245 @@ +import re +from pathlib import Path +from typing import Iterable, List, Literal, Optional, Tuple, Union + +from docling_core.types.doc import ( + CodeItem, + DocItemLabel, + DoclingDocument, + NodeItem, + TextItem, +) +from docling_core.types.doc.labels import CodeLanguageLabel +from PIL import Image +from pydantic import BaseModel + +from docling.datamodel.base_models import ItemAndImageEnrichmentElement +from docling.datamodel.pipeline_options import AcceleratorOptions +from docling.models.base_model import BaseItemAndImageEnrichmentModel +from docling.utils.accelerator_utils import decide_device + + +class CodeFormulaModelOptions(BaseModel): + """ + Configuration options for the CodeFormulaModel. + + Attributes + ---------- + kind : str + Type of the model. Fixed value "code_formula". + do_code_enrichment : bool + True if code enrichment is enabled, False otherwise. + do_formula_enrichment : bool + True if formula enrichment is enabled, False otherwise. + """ + + kind: Literal["code_formula"] = "code_formula" + do_code_enrichment: bool = True + do_formula_enrichment: bool = True + + +class CodeFormulaModel(BaseItemAndImageEnrichmentModel): + """ + Model for processing and enriching documents with code and formula predictions. + + Attributes + ---------- + enabled : bool + True if the model is enabled, False otherwise. + options : CodeFormulaModelOptions + Configuration options for the CodeFormulaModel. + code_formula_model : CodeFormulaPredictor + The predictor model for code and formula processing. + + Methods + ------- + __init__(self, enabled, artifacts_path, accelerator_options, code_formula_options) + Initializes the CodeFormulaModel with the given configuration options. + is_processable(self, doc, element) + Determines if a given element in a document can be processed by the model. + __call__(self, doc, element_batch) + Processes the given batch of elements and enriches them with predictions. + """ + + images_scale = 1.66 # = 120 dpi, aligned with training data resolution + expansion_factor = 0.03 + + def __init__( + self, + enabled: bool, + artifacts_path: Optional[Union[Path, str]], + options: CodeFormulaModelOptions, + accelerator_options: AcceleratorOptions, + ): + """ + Initializes the CodeFormulaModel with the given configuration. + + Parameters + ---------- + enabled : bool + True if the model is enabled, False otherwise. + artifacts_path : Path + Path to the directory containing the model artifacts. + options : CodeFormulaModelOptions + Configuration options for the model. + accelerator_options : AcceleratorOptions + Options specifying the device and number of threads for acceleration. + """ + self.enabled = enabled + self.options = options + + if self.enabled: + device = decide_device(accelerator_options.device) + + from docling_ibm_models.code_formula_model.code_formula_predictor import ( + CodeFormulaPredictor, + ) + + if artifacts_path is None: + artifacts_path = self.download_models_hf() + else: + artifacts_path = Path(artifacts_path) + + self.code_formula_model = CodeFormulaPredictor( + artifacts_path=artifacts_path, + device=device, + num_threads=accelerator_options.num_threads, + ) + + @staticmethod + def download_models_hf( + local_dir: Optional[Path] = None, force: bool = False + ) -> Path: + from huggingface_hub import snapshot_download + from huggingface_hub.utils import disable_progress_bars + + disable_progress_bars() + download_path = snapshot_download( + repo_id="ds4sd/CodeFormula", + force_download=force, + local_dir=local_dir, + revision="v1.0.0", + ) + + return Path(download_path) + + def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool: + """ + Determines if a given element in a document can be processed by the model. + + Parameters + ---------- + doc : DoclingDocument + The document being processed. + element : NodeItem + The element within the document to check. + + Returns + ------- + bool + True if the element can be processed, False otherwise. + """ + return self.enabled and ( + (isinstance(element, CodeItem) and self.options.do_code_enrichment) + or ( + isinstance(element, TextItem) + and element.label == DocItemLabel.FORMULA + and self.options.do_formula_enrichment + ) + ) + + def _extract_code_language(self, input_string: str) -> Tuple[str, Optional[str]]: + """Extracts a programming language from the beginning of a string. + + This function checks if the input string starts with a pattern of the form + ``<_some_language_>``. If it does, it extracts the language string and returns + a tuple of (remainder, language). Otherwise, it returns the original string + and `None`. + + Args: + input_string (str): The input string, which may start with ``<_language_>``. + + Returns: + Tuple[str, Optional[str]]: + A tuple where: + - The first element is either: + - The remainder of the string (everything after ``<_language_>``), + if a match is found; or + - The original string, if no match is found. + - The second element is the extracted language if a match is found; + otherwise, `None`. + """ + pattern = r"^<_([^>]+)_>\s*(.*)" + match = re.match(pattern, input_string, flags=re.DOTALL) + if match: + language = str(match.group(1)) # the captured programming language + remainder = str(match.group(2)) # everything after the <_language_> + return remainder, language + else: + return input_string, None + + def _get_code_language_enum(self, value: Optional[str]) -> CodeLanguageLabel: + """ + Converts a string to a corresponding `CodeLanguageLabel` enum member. + + If the provided string does not match any value in `CodeLanguageLabel`, + it defaults to `CodeLanguageLabel.UNKNOWN`. + + Args: + value (Optional[str]): The string representation of the code language or None. + + Returns: + CodeLanguageLabel: The corresponding enum member if the value is valid, + otherwise `CodeLanguageLabel.UNKNOWN`. + """ + if not isinstance(value, str): + return CodeLanguageLabel.UNKNOWN + + try: + return CodeLanguageLabel(value) + except ValueError: + return CodeLanguageLabel.UNKNOWN + + def __call__( + self, + doc: DoclingDocument, + element_batch: Iterable[ItemAndImageEnrichmentElement], + ) -> Iterable[NodeItem]: + """ + Processes the given batch of elements and enriches them with predictions. + + Parameters + ---------- + doc : DoclingDocument + The document being processed. + element_batch : Iterable[ItemAndImageEnrichmentElement] + A batch of elements to be processed. + + Returns + ------- + Iterable[Any] + An iterable of enriched elements. + """ + if not self.enabled: + for element in element_batch: + yield element.item + return + + labels: List[str] = [] + images: List[Image.Image] = [] + elements: List[TextItem] = [] + for el in element_batch: + assert isinstance(el.item, TextItem) + elements.append(el.item) + labels.append(el.item.label) + images.append(el.image) + + outputs = self.code_formula_model.predict(images, labels) + + for item, output in zip(elements, outputs): + if isinstance(item, CodeItem): + output, code_language = self._extract_code_language(output) + item.code_language = self._get_code_language_enum(code_language) + item.text = output + + yield item diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py index c1b7dab..9fa0ecb 100644 --- a/docling/models/layout_model.py +++ b/docling/models/layout_model.py @@ -40,7 +40,7 @@ class LayoutModel(BasePageModel): DocItemLabel.PAGE_FOOTER, DocItemLabel.CODE, DocItemLabel.LIST_ITEM, - # "Formula", + DocItemLabel.FORMULA, ] PAGE_HEADER_LABELS = [DocItemLabel.PAGE_HEADER, DocItemLabel.PAGE_FOOTER] diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py index 6239dbf..4acf8c9 100644 --- a/docling/models/page_assemble_model.py +++ b/docling/models/page_assemble_model.py @@ -135,31 +135,6 @@ class PageAssembleModel(BasePageModel): ) elements.append(fig) body.append(fig) - elif cluster.label == LayoutModel.FORMULA_LABEL: - equation = None - if page.predictions.equations_prediction: - equation = page.predictions.equations_prediction.equation_map.get( - cluster.id, None - ) - if ( - not equation - ): # fallback: add empty formula, if it isn't present - text = self.sanitize_text( - [ - cell.text.replace("\x02", "-").strip() - for cell in cluster.cells - if len(cell.text.strip()) > 0 - ] - ) - equation = TextElement( - label=cluster.label, - id=cluster.id, - cluster=cluster, - page_no=page.page_no, - text=text, - ) - elements.append(equation) - body.append(equation) elif cluster.label in LayoutModel.CONTAINER_LABELS: container_el = ContainerElement( label=cluster.label, diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index 16e1629..3d5c800 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -20,7 +20,6 @@ _log = logging.getLogger(__name__) class TesseractOcrCliModel(BaseOcrModel): - def __init__(self, enabled: bool, options: TesseractCliOcrOptions): super().__init__(enabled=enabled, options=options) self.options: TesseractCliOcrOptions diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py index 034e6d4..75a08e7 100644 --- a/docling/pipeline/base_pipeline.py +++ b/docling/pipeline/base_pipeline.py @@ -3,7 +3,7 @@ import logging import time import traceback from abc import ABC, abstractmethod -from typing import Callable, Iterable, List +from typing import Any, Callable, Iterable, List from docling_core.types.doc import DoclingDocument, NodeItem @@ -18,7 +18,7 @@ from docling.datamodel.base_models import ( from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.pipeline_options import PipelineOptions from docling.datamodel.settings import settings -from docling.models.base_model import BaseEnrichmentModel +from docling.models.base_model import GenericEnrichmentModel from docling.utils.profiling import ProfilingScope, TimeRecorder from docling.utils.utils import chunkify @@ -30,7 +30,7 @@ class BasePipeline(ABC): self.pipeline_options = pipeline_options self.keep_images = False self.build_pipe: List[Callable] = [] - self.enrichment_pipe: List[BaseEnrichmentModel] = [] + self.enrichment_pipe: List[GenericEnrichmentModel[Any]] = [] def execute(self, in_doc: InputDocument, raises_on_error: bool) -> ConversionResult: conv_res = ConversionResult(input=in_doc) @@ -66,7 +66,7 @@ class BasePipeline(ABC): def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult: def _prepare_elements( - conv_res: ConversionResult, model: BaseEnrichmentModel + conv_res: ConversionResult, model: GenericEnrichmentModel[Any] ) -> Iterable[NodeItem]: for doc_element, _level in conv_res.document.iterate_items(): prepared_element = model.prepare_element( diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 758f4e9..97bcc6b 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -1,7 +1,7 @@ import logging import sys from pathlib import Path -from typing import Iterable, Optional +from typing import Optional from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem @@ -17,8 +17,8 @@ from docling.datamodel.pipeline_options import ( TesseractCliOcrOptions, TesseractOcrOptions, ) -from docling.models.base_model import BasePageModel from docling.models.base_ocr_model import BaseOcrModel +from docling.models.code_formula_model import CodeFormulaModel, CodeFormulaModelOptions from docling.models.ds_glm_model import GlmModel, GlmOptions from docling.models.easyocr_model import EasyOcrModel from docling.models.layout_model import LayoutModel @@ -93,8 +93,25 @@ class StandardPdfPipeline(PaginatedPipeline): self.enrichment_pipe = [ # Other models working on `NodeItem` elements in the DoclingDocument + # Code Formula Enrichment Model + CodeFormulaModel( + enabled=pipeline_options.do_code_enrichment + or pipeline_options.do_formula_enrichment, + artifacts_path=pipeline_options.artifacts_path, + options=CodeFormulaModelOptions( + do_code_enrichment=pipeline_options.do_code_enrichment, + do_formula_enrichment=pipeline_options.do_formula_enrichment, + ), + accelerator_options=pipeline_options.accelerator_options, + ), ] + if ( + self.pipeline_options.do_formula_enrichment + or self.pipeline_options.do_code_enrichment + ): + self.keep_backend = True + @staticmethod def download_models_hf( local_dir: Optional[Path] = None, force: bool = False diff --git a/docling/utils/glm_utils.py b/docling/utils/glm_utils.py index 1c3b3f6..da29cdd 100644 --- a/docling/utils/glm_utils.py +++ b/docling/utils/glm_utils.py @@ -270,7 +270,6 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument: container_el = doc.add_group(label=group_label) _add_child_elements(container_el, doc, obj, pelem) - elif "text" in obj: text = obj["text"][span_i:span_j] @@ -304,6 +303,10 @@ def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument: current_list = None doc.add_heading(text=text, prov=prov) + elif label == DocItemLabel.CODE: + current_list = None + + doc.add_code(text=text, prov=prov) else: current_list = None diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py index 7ad06e4..81009fe 100644 --- a/docs/examples/develop_picture_enrichment.py +++ b/docs/examples/develop_picture_enrichment.py @@ -22,7 +22,6 @@ class ExamplePictureClassifierPipelineOptions(PdfPipelineOptions): class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel): - def __init__(self, enabled: bool): self.enabled = enabled @@ -54,7 +53,6 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel): class ExamplePictureClassifierPipeline(StandardPdfPipeline): - def __init__(self, pipeline_options: ExamplePictureClassifierPipelineOptions): super().__init__(pipeline_options) self.pipeline_options: ExamplePictureClassifierPipeline diff --git a/poetry.lock b/poetry.lock index 64226c3..73fc85d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -231,21 +231,6 @@ docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphi tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"] -[[package]] -name = "autoflake" -version = "2.3.1" -description = "Removes unused imports and unused variables" -optional = false -python-versions = ">=3.8" -files = [ - {file = "autoflake-2.3.1-py3-none-any.whl", hash = "sha256:3ae7495db9084b7b32818b4140e6dc4fc280b712fb414f5b8fe57b0a8e85a840"}, - {file = "autoflake-2.3.1.tar.gz", hash = "sha256:c98b75dc5b0a86459c4f01a1d32ac7eb4338ec4317a4469515ff1e687ecd909e"}, -] - -[package.dependencies] -pyflakes = ">=3.0.0" -tomli = {version = ">=2.0.1", markers = "python_version < \"3.11\""} - [[package]] name = "autopep8" version = "2.2.0" @@ -876,13 +861,13 @@ files = [ [[package]] name = "docling-core" -version = "2.14.0" +version = "2.15.1" description = "A python library to define and validate data types in Docling." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_core-2.14.0-py3-none-any.whl", hash = "sha256:05a7b89872260dcdba2b0fbcc3a4619aed4846f58f155d33a10b41b23eea5188"}, - {file = "docling_core-2.14.0.tar.gz", hash = "sha256:0eb6a52e05f2a06e1777b0533d655a87b54a1a5d374b957beb244c8940aed7da"}, + {file = "docling_core-2.15.1-py3-none-any.whl", hash = "sha256:33152604e1f14d5caccbef099c73163c3f211d0b4d92403d262c308633cc0451"}, + {file = "docling_core-2.15.1.tar.gz", hash = "sha256:588d941b5bfc393a79e779ab64819c60763e7f182ec5221ee37da4be91dd802f"}, ] [package.dependencies] @@ -903,13 +888,13 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"] [[package]] name = "docling-ibm-models" -version = "3.1.2" +version = "3.2.1" description = "This package contains the AI models used by the Docling PDF conversion package" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_ibm_models-3.1.2-py3-none-any.whl", hash = "sha256:c5d2fa83db08ec538bb77e3d5d79c9ccef7b6873aab19ddcf5bb5e9801bf4a03"}, - {file = "docling_ibm_models-3.1.2.tar.gz", hash = "sha256:68c8b8f1cb87a8d8c5c6d6fe2c86679d65a09d354ec413ed13e9493ee0cd3794"}, + {file = "docling_ibm_models-3.2.1-py3-none-any.whl", hash = "sha256:55bca5673381cc5862f4de584345020d071414c46bc1b9f6436d674e3610ec97"}, + {file = "docling_ibm_models-3.2.1.tar.gz", hash = "sha256:abd1bdc58f00600065eedbfbd34876704d5004cd20884a2c0a61ca2ee5a927dd"}, ] [package.dependencies] @@ -932,43 +917,42 @@ transformers = [ [[package]] name = "docling-parse" -version = "3.1.0" +version = "3.1.1" description = "Simple package to extract text with coordinates from programmatic PDFs" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_parse-3.1.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:0bfd425f2a66a23e7dce51aa461984b69cbb6cf2d7e948c1388314b5a6089045"}, - {file = "docling_parse-3.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:d1c8c689915f8d768fc850fd3e98d9e6e3d50fb070cab84fddac2465c3bc78bc"}, - {file = "docling_parse-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42c24570fcc9e820b86de4996e8c8fa76d490ae8b16c9dc264a2d60f4aaf5801"}, - {file = "docling_parse-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9e05e744f6ffbfe6e65b53a0a6bdc440814b2581edf1dbab26aca38b44514a5"}, - {file = "docling_parse-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f59507c9a9672032b3029df7318ab017bf66986f95e00edc5d2e4c5c6ec5fa7e"}, - {file = "docling_parse-3.1.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:428098cc5f8a449101099f77e53862a1e8db58e27a028a2a9ce397be26d4788f"}, - {file = "docling_parse-3.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:5f877b9d5ac46828327e5bd94da7b2f54da900d4a729c52533df904dd3463f85"}, - {file = "docling_parse-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c10e587ca2c55522f7cf35b8d9983f1b2eeef6f67b168da157ad4fc1e3ad5ba2"}, - {file = "docling_parse-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2da249c455758f422d47379202dc599a3f35fe811ed03dc019fd42548d8cd2b"}, - {file = "docling_parse-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:2aeaa1a34f92208ed63784ee04f69644f32974b520d957db753256c6d0ccf2b6"}, - {file = "docling_parse-3.1.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:6997eb40f7d04e4438e3a5218df23c07b260cbece3f150cdd428bf05f927a389"}, - {file = "docling_parse-3.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1307169244cd145c59c2c8137bb9c0c132dc2bb0abf890875e8ca22f09fa4f99"}, - {file = "docling_parse-3.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd1d4460303d1d7162c0f45f141bef6bfb39df235c7b7e9ba842a921c74e3176"}, - {file = "docling_parse-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:970a44d2d79a007957e91b2501c0572e2d0f4623c3f3685ba7a8b09bf39df1b1"}, - {file = "docling_parse-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:b9a80f4b4161c57797f061fa111ce797eeacf52bce9bebba7919a8ea7fcc26ac"}, - {file = "docling_parse-3.1.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:993e27c953bb0fa53100aea694442c0147f523044c02fd7c193b68ac9070d507"}, - {file = "docling_parse-3.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:0a19d828c466c653ffdaff1d7da2cd7f4aab212360c477629463da8f0e150c05"}, - {file = "docling_parse-3.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd868b8ee626b9e74c8d2bb3ad8e7f036dd6839250bdebe7a4c0a1657aaa8dbd"}, - {file = "docling_parse-3.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:13c1627a7c16527bf2003d764a5f8aa23337d8c56085d3d4717e65d2c940b242"}, - {file = "docling_parse-3.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:67175719913fa3adb6476a0753383b4a0d156e286927ae641085ff99f85320c4"}, - {file = "docling_parse-3.1.0-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:ada2b7fc587fd31d1a0b565957596d9b057b32730e9cd0bd390b38cf1f5192bd"}, - {file = "docling_parse-3.1.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:c70ef7cedf37a87fe26ef3df5870523c9471b2144170ee3d7756c2cd4c3eb687"}, - {file = "docling_parse-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:181ee70a617ec08dbdd1bbbf613993be86e0da8ea8c5024704eb294e70f8b253"}, - {file = "docling_parse-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ed50ed868ac18b805fc7e8885d3f5504c0d28eec1c40e22691f7aac0edf6374"}, - {file = "docling_parse-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:55e02583f7214e4d0d46a5a687abaf5a080dc7c6a7015888e23d83549702434f"}, - {file = "docling_parse-3.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d83d07744e52f1b4cfaf50f21c47b2d0df6d285a7e3d4297e09ba159b36b9277"}, - {file = "docling_parse-3.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9f70d59e05c53b5c6538eb480dce5ba9c02f349a3ccc99d20df6303a56cd3ded"}, - {file = "docling_parse-3.1.0.tar.gz", hash = "sha256:4a159222c7c4b4b7932330168405ef2b887bc5e3d217c9dd25a9d9b51f9187d3"}, + {file = "docling_parse-3.1.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:cccf1b7912ece508f75bc004dff392298fc956f33c62f3a48db6c0a7976d808a"}, + {file = "docling_parse-3.1.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3537f3bbd2152a3f2c25142ac93b9db08e68eca923863dca272a0f588739855d"}, + {file = "docling_parse-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dffee503cf6be3343df2d9421067585a88543ed5c94e39662a3cfc5cd2b794a"}, + {file = "docling_parse-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c856f0dbe8f10296442f749109d5d5dc86f10151a0e51e8629b32d053d0e61c2"}, + {file = "docling_parse-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:5693ef8cba6096d8ed1039f61a663ea74bac711d06616b51254c495ebb3eb53d"}, + {file = "docling_parse-3.1.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:8e586bcd24d7e3ba26ebdace4cb15d70498fb4656fc9f6f20f24b007de0628d6"}, + {file = "docling_parse-3.1.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1e8137020ed9bff26eb70dbbdb42c62f3e87c81001e3ecd41e39b3ec3631d7bf"}, + {file = "docling_parse-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bec98497626202a6fa7e2a715814414131b53b32cd2999e540edf87a60e45ef5"}, + {file = "docling_parse-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:537e0eec387a9cc3e35492752efc561982b3cf02b3d571ca46c4a0af3a884068"}, + {file = "docling_parse-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ca3d45a0e9cd41c5e6e0002eaa1a3478bc065b58dc7d38a114eb5ad37f762934"}, + {file = "docling_parse-3.1.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:008d751f4fdd82a3cbe3e8d4abaa4d5cf0d0cb35d16334c5dfc22a62001c780b"}, + {file = "docling_parse-3.1.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:a06a0e4b403387e9c4e79d388aa63ace75d1aa855018238634ec8ce262369ffa"}, + {file = "docling_parse-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b802ae9c2464fc0354721d0ef3c73f573c202fa1995276afceaf5882bb894583"}, + {file = "docling_parse-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dc796b738e4ba3663084ee9fa4fe749e8aa27154bf459a3531e5a5b9c774b6b"}, + {file = "docling_parse-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:dbad418bedc7706c230ae8212cd08a41400762104be3df512ffe05d0f468d6e2"}, + {file = "docling_parse-3.1.1-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:b32f46810f7c05de3e1fd13c2bbe58291710b90777baefefd8ed04118be319db"}, + {file = "docling_parse-3.1.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:10ffbfe70a0eda2cac42a8fb2ebbe0adafdcfeb173ecaa0e7e0e7769cc020449"}, + {file = "docling_parse-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4389f552297c0798bfc9b4b0116461d7e154340311b143264e9e48808f19884"}, + {file = "docling_parse-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a716412318f5136fde397925a06b3d1cc3fce33f060175574d09576cbfc901f1"}, + {file = "docling_parse-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:7ff36910971bc015270c4aaae5f01d783970a0af840ca84070a41564759048c5"}, + {file = "docling_parse-3.1.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:9711ed84828bfc35b8cd02aedbf3a9a264eaaf567c8168c8c1cca5eb239490eb"}, + {file = "docling_parse-3.1.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:fae1a11fd48faaf2961332d75f507aab452d3fbe88085a46cdfbb1efbc3b5c0c"}, + {file = "docling_parse-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ecfdcf69eb93dbe1e6798b1516e657aab6b3b3435d6d161078108ef6f2d8edb"}, + {file = "docling_parse-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba63a538e329f66666732a24d5ce4871eb19646833012e5b2c500ccdda29d959"}, + {file = "docling_parse-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:22229c00ae9a34d77840e9352fd02d05dbbd90cfe1fdac9319d7a653bd7ba060"}, + {file = "docling_parse-3.1.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:97bbd6b45681c643d1ca2917d4c6813735a3527ee2af2823ebdf3882545539bf"}, + {file = "docling_parse-3.1.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:9637c9676d6ba652362673f57d8f8af9ea35c844ca25116e61ecd5c138ceb1a7"}, + {file = "docling_parse-3.1.1.tar.gz", hash = "sha256:fb62c85132d35edd91cee5c093b9e45d981ca7fa8ba0c560f0c3ce56993e4f8e"}, ] [package.dependencies] -autoflake = ">=2.3.1,<3.0.0" docling-core = ">=2.14.0,<3.0.0" pillow = ">=10.4.0,<11.0.0" pydantic = ">=2.10.5,<3.0.0" @@ -1090,18 +1074,18 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "filelock" -version = "3.16.1" +version = "3.17.0" description = "A platform independent file lock." optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, - {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, + {file = "filelock-3.17.0-py3-none-any.whl", hash = "sha256:533dc2f7ba78dc2f0f531fc6c4940addf7b70a481e269a5a3b93be94ffbe8338"}, + {file = "filelock-3.17.0.tar.gz", hash = "sha256:ee4e77401ef576ebb38cd7f13b9b28893194acc20a8e68e18730ba9c0e54660e"}, ] [package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.1.3)", "sphinx-autodoc-typehints (>=3)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.10)", "diff-cover (>=9.2.1)", "pytest (>=8.3.4)", "pytest-asyncio (>=0.25.2)", "pytest-cov (>=6)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.28.1)"] typing = ["typing-extensions (>=4.12.2)"] [[package]] @@ -1150,13 +1134,13 @@ dev = ["pyTest", "pyTest-cov"] [[package]] name = "flatbuffers" -version = "24.12.23" +version = "25.1.21" description = "The FlatBuffers serialization format for Python" optional = true python-versions = "*" files = [ - {file = "flatbuffers-24.12.23-py2.py3-none-any.whl", hash = "sha256:c418e0d48890f4142b92fd3e343e73a48f194e1f80075ddcc5793779b3585444"}, - {file = "flatbuffers-24.12.23.tar.gz", hash = "sha256:2910b0bc6ae9b6db78dd2b18d0b7a0709ba240fb5585f286a3a2b30785c22dac"}, + {file = "flatbuffers-25.1.21-py2.py3-none-any.whl", hash = "sha256:0e9736098ba8f4e48246a0640390f4992c0b1a734e7322a9463d5c3eea00558b"}, + {file = "flatbuffers-25.1.21.tar.gz", hash = "sha256:e24a34dcd9fb4e0ea8cc0fc8ef9c5cd61c9d21527a6d536967587a37a4ff9676"}, ] [[package]] @@ -1552,13 +1536,13 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.5" +version = "2.6.6" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.5-py2.py3-none-any.whl", hash = "sha256:14181a47091eb75b337af4c23078c9d09225cd4c48929f521f3bf16b09d02566"}, - {file = "identify-2.6.5.tar.gz", hash = "sha256:c10b33f250e5bba374fae86fb57f3adcebf1161bce7cdf92031915fd480c13bc"}, + {file = "identify-2.6.6-py2.py3-none-any.whl", hash = "sha256:cbd1810bce79f8b671ecb20f53ee0ae8e86ae84b557de31d89709dc2a48ba881"}, + {file = "identify-2.6.6.tar.gz", hash = "sha256:7bec12768ed44ea4761efb47806f0a41f86e7c0a5fdf5950d4648c90eca7e251"}, ] [package.extras] @@ -1580,13 +1564,13 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2 [[package]] name = "imageio" -version = "2.36.1" +version = "2.37.0" description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats." optional = false python-versions = ">=3.9" files = [ - {file = "imageio-2.36.1-py3-none-any.whl", hash = "sha256:20abd2cae58e55ca1af8a8dcf43293336a59adf0391f1917bf8518633cfc2cdf"}, - {file = "imageio-2.36.1.tar.gz", hash = "sha256:e4e1d231f47f9a9e16100b0f7ce1a86e8856fb4d1c0fa2c4365a316f1746be62"}, + {file = "imageio-2.37.0-py3-none-any.whl", hash = "sha256:11efa15b87bc7871b61590326b2d635439acc321cf7f8ce996f812543ce10eed"}, + {file = "imageio-2.37.0.tar.gz", hash = "sha256:71b57b3669666272c818497aebba2b4c5f20d5b37c81720e5e1a56d59c492996"}, ] [package.dependencies] @@ -1613,13 +1597,13 @@ tifffile = ["tifffile"] [[package]] name = "importlib-metadata" -version = "8.5.0" +version = "8.6.1" description = "Read metadata from Python packages" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, - {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"}, + {file = "importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e"}, + {file = "importlib_metadata-8.6.1.tar.gz", hash = "sha256:310b41d755445d74569f993ccfc22838295d9fe005425094fad953d7f15c8580"}, ] [package.dependencies] @@ -1631,7 +1615,7 @@ cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] enabler = ["pytest-enabler (>=2.2)"] perf = ["ipython"] -test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] +test = ["flufl.flake8", "importlib_resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"] type = ["pytest-mypy"] [[package]] @@ -2726,13 +2710,13 @@ pygments = ">2.12.0" [[package]] name = "mkdocs-material" -version = "9.5.49" +version = "9.5.50" description = "Documentation that simply works" optional = false python-versions = ">=3.8" files = [ - {file = "mkdocs_material-9.5.49-py3-none-any.whl", hash = "sha256:c3c2d8176b18198435d3a3e119011922f3e11424074645c24019c2dcf08a360e"}, - {file = "mkdocs_material-9.5.49.tar.gz", hash = "sha256:3671bb282b4f53a1c72e08adbe04d2481a98f85fed392530051f80ff94a9621d"}, + {file = "mkdocs_material-9.5.50-py3-none-any.whl", hash = "sha256:f24100f234741f4d423a9d672a909d859668a4f404796be3cf035f10d6050385"}, + {file = "mkdocs_material-9.5.50.tar.gz", hash = "sha256:ae5fe16f3d7c9ccd05bb6916a7da7420cf99a9ce5e33debd9d40403a090d5825"}, ] [package.dependencies] @@ -2749,7 +2733,7 @@ regex = ">=2022.4" requests = ">=2.26,<3.0" [package.extras] -git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"] +git = ["mkdocs-git-committers-plugin-2 (>=1.1,<3)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"] imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"] recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"] @@ -3364,66 +3348,66 @@ files = [ [[package]] name = "numpy" -version = "2.2.1" +version = "2.2.2" description = "Fundamental package for array computing in Python" optional = false python-versions = ">=3.10" files = [ - {file = "numpy-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5edb4e4caf751c1518e6a26a83501fda79bff41cc59dac48d70e6d65d4ec4440"}, - {file = "numpy-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aa3017c40d513ccac9621a2364f939d39e550c542eb2a894b4c8da92b38896ab"}, - {file = "numpy-2.2.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:61048b4a49b1c93fe13426e04e04fdf5a03f456616f6e98c7576144677598675"}, - {file = "numpy-2.2.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:7671dc19c7019103ca44e8d94917eba8534c76133523ca8406822efdd19c9308"}, - {file = "numpy-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4250888bcb96617e00bfa28ac24850a83c9f3a16db471eca2ee1f1714df0f957"}, - {file = "numpy-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a7746f235c47abc72b102d3bce9977714c2444bdfaea7888d241b4c4bb6a78bf"}, - {file = "numpy-2.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:059e6a747ae84fce488c3ee397cee7e5f905fd1bda5fb18c66bc41807ff119b2"}, - {file = "numpy-2.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f62aa6ee4eb43b024b0e5a01cf65a0bb078ef8c395e8713c6e8a12a697144528"}, - {file = "numpy-2.2.1-cp310-cp310-win32.whl", hash = "sha256:48fd472630715e1c1c89bf1feab55c29098cb403cc184b4859f9c86d4fcb6a95"}, - {file = "numpy-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:b541032178a718c165a49638d28272b771053f628382d5e9d1c93df23ff58dbf"}, - {file = "numpy-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:40f9e544c1c56ba8f1cf7686a8c9b5bb249e665d40d626a23899ba6d5d9e1484"}, - {file = "numpy-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f9b57eaa3b0cd8db52049ed0330747b0364e899e8a606a624813452b8203d5f7"}, - {file = "numpy-2.2.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bc8a37ad5b22c08e2dbd27df2b3ef7e5c0864235805b1e718a235bcb200cf1cb"}, - {file = "numpy-2.2.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9036d6365d13b6cbe8f27a0eaf73ddcc070cae584e5ff94bb45e3e9d729feab5"}, - {file = "numpy-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51faf345324db860b515d3f364eaa93d0e0551a88d6218a7d61286554d190d73"}, - {file = "numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38efc1e56b73cc9b182fe55e56e63b044dd26a72128fd2fbd502f75555d92591"}, - {file = "numpy-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:31b89fa67a8042e96715c68e071a1200c4e172f93b0fbe01a14c0ff3ff820fc8"}, - {file = "numpy-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4c86e2a209199ead7ee0af65e1d9992d1dce7e1f63c4b9a616500f93820658d0"}, - {file = "numpy-2.2.1-cp311-cp311-win32.whl", hash = "sha256:b34d87e8a3090ea626003f87f9392b3929a7bbf4104a05b6667348b6bd4bf1cd"}, - {file = "numpy-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:360137f8fb1b753c5cde3ac388597ad680eccbbbb3865ab65efea062c4a1fd16"}, - {file = "numpy-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:694f9e921a0c8f252980e85bce61ebbd07ed2b7d4fa72d0e4246f2f8aa6642ab"}, - {file = "numpy-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3683a8d166f2692664262fd4900f207791d005fb088d7fdb973cc8d663626faa"}, - {file = "numpy-2.2.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:780077d95eafc2ccc3ced969db22377b3864e5b9a0ea5eb347cc93b3ea900315"}, - {file = "numpy-2.2.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:55ba24ebe208344aa7a00e4482f65742969a039c2acfcb910bc6fcd776eb4355"}, - {file = "numpy-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b1d07b53b78bf84a96898c1bc139ad7f10fda7423f5fd158fd0f47ec5e01ac7"}, - {file = "numpy-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5062dc1a4e32a10dc2b8b13cedd58988261416e811c1dc4dbdea4f57eea61b0d"}, - {file = "numpy-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fce4f615f8ca31b2e61aa0eb5865a21e14f5629515c9151850aa936c02a1ee51"}, - {file = "numpy-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:67d4cda6fa6ffa073b08c8372aa5fa767ceb10c9a0587c707505a6d426f4e046"}, - {file = "numpy-2.2.1-cp312-cp312-win32.whl", hash = "sha256:32cb94448be47c500d2c7a95f93e2f21a01f1fd05dd2beea1ccd049bb6001cd2"}, - {file = "numpy-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:ba5511d8f31c033a5fcbda22dd5c813630af98c70b2661f2d2c654ae3cdfcfc8"}, - {file = "numpy-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f1d09e520217618e76396377c81fba6f290d5f926f50c35f3a5f72b01a0da780"}, - {file = "numpy-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3ecc47cd7f6ea0336042be87d9e7da378e5c7e9b3c8ad0f7c966f714fc10d821"}, - {file = "numpy-2.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f419290bc8968a46c4933158c91a0012b7a99bb2e465d5ef5293879742f8797e"}, - {file = "numpy-2.2.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5b6c390bfaef8c45a260554888966618328d30e72173697e5cabe6b285fb2348"}, - {file = "numpy-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:526fc406ab991a340744aad7e25251dd47a6720a685fa3331e5c59fef5282a59"}, - {file = "numpy-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f74e6fdeb9a265624ec3a3918430205dff1df7e95a230779746a6af78bc615af"}, - {file = "numpy-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:53c09385ff0b72ba79d8715683c1168c12e0b6e84fb0372e97553d1ea91efe51"}, - {file = "numpy-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3eac17d9ec51be534685ba877b6ab5edc3ab7ec95c8f163e5d7b39859524716"}, - {file = "numpy-2.2.1-cp313-cp313-win32.whl", hash = "sha256:9ad014faa93dbb52c80d8f4d3dcf855865c876c9660cb9bd7553843dd03a4b1e"}, - {file = "numpy-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:164a829b6aacf79ca47ba4814b130c4020b202522a93d7bff2202bfb33b61c60"}, - {file = "numpy-2.2.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4dfda918a13cc4f81e9118dea249e192ab167a0bb1966272d5503e39234d694e"}, - {file = "numpy-2.2.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:733585f9f4b62e9b3528dd1070ec4f52b8acf64215b60a845fa13ebd73cd0712"}, - {file = "numpy-2.2.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:89b16a18e7bba224ce5114db863e7029803c179979e1af6ad6a6b11f70545008"}, - {file = "numpy-2.2.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:676f4eebf6b2d430300f1f4f4c2461685f8269f94c89698d832cdf9277f30b84"}, - {file = "numpy-2.2.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27f5cdf9f493b35f7e41e8368e7d7b4bbafaf9660cba53fb21d2cd174ec09631"}, - {file = "numpy-2.2.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1ad395cf254c4fbb5b2132fee391f361a6e8c1adbd28f2cd8e79308a615fe9d"}, - {file = "numpy-2.2.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:08ef779aed40dbc52729d6ffe7dd51df85796a702afbf68a4f4e41fafdc8bda5"}, - {file = "numpy-2.2.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:26c9c4382b19fcfbbed3238a14abf7ff223890ea1936b8890f058e7ba35e8d71"}, - {file = "numpy-2.2.1-cp313-cp313t-win32.whl", hash = "sha256:93cf4e045bae74c90ca833cba583c14b62cb4ba2cba0abd2b141ab52548247e2"}, - {file = "numpy-2.2.1-cp313-cp313t-win_amd64.whl", hash = "sha256:bff7d8ec20f5f42607599f9994770fa65d76edca264a87b5e4ea5629bce12268"}, - {file = "numpy-2.2.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7ba9cc93a91d86365a5d270dee221fdc04fb68d7478e6bf6af650de78a8339e3"}, - {file = "numpy-2.2.1-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:3d03883435a19794e41f147612a77a8f56d4e52822337844fff3d4040a142964"}, - {file = "numpy-2.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4511d9e6071452b944207c8ce46ad2f897307910b402ea5fa975da32e0102800"}, - {file = "numpy-2.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5c5cc0cbabe9452038ed984d05ac87910f89370b9242371bd9079cb4af61811e"}, - {file = "numpy-2.2.1.tar.gz", hash = "sha256:45681fd7128c8ad1c379f0ca0776a8b0c6583d2f69889ddac01559dfe4390918"}, + {file = "numpy-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7079129b64cb78bdc8d611d1fd7e8002c0a2565da6a47c4df8062349fee90e3e"}, + {file = "numpy-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ec6c689c61df613b783aeb21f945c4cbe6c51c28cb70aae8430577ab39f163e"}, + {file = "numpy-2.2.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:40c7ff5da22cd391944a28c6a9c638a5eef77fcf71d6e3a79e1d9d9e82752715"}, + {file = "numpy-2.2.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:995f9e8181723852ca458e22de5d9b7d3ba4da3f11cc1cb113f093b271d7965a"}, + {file = "numpy-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b78ea78450fd96a498f50ee096f69c75379af5138f7881a51355ab0e11286c97"}, + {file = "numpy-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fbe72d347fbc59f94124125e73fc4976a06927ebc503ec5afbfb35f193cd957"}, + {file = "numpy-2.2.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8e6da5cffbbe571f93588f562ed130ea63ee206d12851b60819512dd3e1ba50d"}, + {file = "numpy-2.2.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:09d6a2032faf25e8d0cadde7fd6145118ac55d2740132c1d845f98721b5ebcfd"}, + {file = "numpy-2.2.2-cp310-cp310-win32.whl", hash = "sha256:159ff6ee4c4a36a23fe01b7c3d07bd8c14cc433d9720f977fcd52c13c0098160"}, + {file = "numpy-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:64bd6e1762cd7f0986a740fee4dff927b9ec2c5e4d9a28d056eb17d332158014"}, + {file = "numpy-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:642199e98af1bd2b6aeb8ecf726972d238c9877b0f6e8221ee5ab945ec8a2189"}, + {file = "numpy-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6d9fc9d812c81e6168b6d405bf00b8d6739a7f72ef22a9214c4241e0dc70b323"}, + {file = "numpy-2.2.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:c7d1fd447e33ee20c1f33f2c8e6634211124a9aabde3c617687d8b739aa69eac"}, + {file = "numpy-2.2.2-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:451e854cfae0febe723077bd0cf0a4302a5d84ff25f0bfece8f29206c7bed02e"}, + {file = "numpy-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd249bc894af67cbd8bad2c22e7cbcd46cf87ddfca1f1289d1e7e54868cc785c"}, + {file = "numpy-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02935e2c3c0c6cbe9c7955a8efa8908dd4221d7755644c59d1bba28b94fd334f"}, + {file = "numpy-2.2.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:a972cec723e0563aa0823ee2ab1df0cb196ed0778f173b381c871a03719d4826"}, + {file = "numpy-2.2.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:d6d6a0910c3b4368d89dde073e630882cdb266755565155bc33520283b2d9df8"}, + {file = "numpy-2.2.2-cp311-cp311-win32.whl", hash = "sha256:860fd59990c37c3ef913c3ae390b3929d005243acca1a86facb0773e2d8d9e50"}, + {file = "numpy-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:da1eeb460ecce8d5b8608826595c777728cdf28ce7b5a5a8c8ac8d949beadcf2"}, + {file = "numpy-2.2.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ac9bea18d6d58a995fac1b2cb4488e17eceeac413af014b1dd26170b766d8467"}, + {file = "numpy-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:23ae9f0c2d889b7b2d88a3791f6c09e2ef827c2446f1c4a3e3e76328ee4afd9a"}, + {file = "numpy-2.2.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3074634ea4d6df66be04f6728ee1d173cfded75d002c75fac79503a880bf3825"}, + {file = "numpy-2.2.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:8ec0636d3f7d68520afc6ac2dc4b8341ddb725039de042faf0e311599f54eb37"}, + {file = "numpy-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ffbb1acd69fdf8e89dd60ef6182ca90a743620957afb7066385a7bbe88dc748"}, + {file = "numpy-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0349b025e15ea9d05c3d63f9657707a4e1d471128a3b1d876c095f328f8ff7f0"}, + {file = "numpy-2.2.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:463247edcee4a5537841d5350bc87fe8e92d7dd0e8c71c995d2c6eecb8208278"}, + {file = "numpy-2.2.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9dd47ff0cb2a656ad69c38da850df3454da88ee9a6fde0ba79acceee0e79daba"}, + {file = "numpy-2.2.2-cp312-cp312-win32.whl", hash = "sha256:4525b88c11906d5ab1b0ec1f290996c0020dd318af8b49acaa46f198b1ffc283"}, + {file = "numpy-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:5acea83b801e98541619af398cc0109ff48016955cc0818f478ee9ef1c5c3dcb"}, + {file = "numpy-2.2.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b208cfd4f5fe34e1535c08983a1a6803fdbc7a1e86cf13dd0c61de0b51a0aadc"}, + {file = "numpy-2.2.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d0bbe7dd86dca64854f4b6ce2ea5c60b51e36dfd597300057cf473d3615f2369"}, + {file = "numpy-2.2.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:22ea3bb552ade325530e72a0c557cdf2dea8914d3a5e1fecf58fa5dbcc6f43cd"}, + {file = "numpy-2.2.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:128c41c085cab8a85dc29e66ed88c05613dccf6bc28b3866cd16050a2f5448be"}, + {file = "numpy-2.2.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:250c16b277e3b809ac20d1f590716597481061b514223c7badb7a0f9993c7f84"}, + {file = "numpy-2.2.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0c8854b09bc4de7b041148d8550d3bd712b5c21ff6a8ed308085f190235d7ff"}, + {file = "numpy-2.2.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b6fb9c32a91ec32a689ec6410def76443e3c750e7cfc3fb2206b985ffb2b85f0"}, + {file = "numpy-2.2.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:57b4012e04cc12b78590a334907e01b3a85efb2107df2b8733ff1ed05fce71de"}, + {file = "numpy-2.2.2-cp313-cp313-win32.whl", hash = "sha256:4dbd80e453bd34bd003b16bd802fac70ad76bd463f81f0c518d1245b1c55e3d9"}, + {file = "numpy-2.2.2-cp313-cp313-win_amd64.whl", hash = "sha256:5a8c863ceacae696aff37d1fd636121f1a512117652e5dfb86031c8d84836369"}, + {file = "numpy-2.2.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:b3482cb7b3325faa5f6bc179649406058253d91ceda359c104dac0ad320e1391"}, + {file = "numpy-2.2.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9491100aba630910489c1d0158034e1c9a6546f0b1340f716d522dc103788e39"}, + {file = "numpy-2.2.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:41184c416143defa34cc8eb9d070b0a5ba4f13a0fa96a709e20584638254b317"}, + {file = "numpy-2.2.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:7dca87ca328f5ea7dafc907c5ec100d187911f94825f8700caac0b3f4c384b49"}, + {file = "numpy-2.2.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bc61b307655d1a7f9f4b043628b9f2b721e80839914ede634e3d485913e1fb2"}, + {file = "numpy-2.2.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fad446ad0bc886855ddf5909cbf8cb5d0faa637aaa6277fb4b19ade134ab3c7"}, + {file = "numpy-2.2.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:149d1113ac15005652e8d0d3f6fd599360e1a708a4f98e43c9c77834a28238cb"}, + {file = "numpy-2.2.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:106397dbbb1896f99e044efc90360d098b3335060375c26aa89c0d8a97c5f648"}, + {file = "numpy-2.2.2-cp313-cp313t-win32.whl", hash = "sha256:0eec19f8af947a61e968d5429f0bd92fec46d92b0008d0a6685b40d6adf8a4f4"}, + {file = "numpy-2.2.2-cp313-cp313t-win_amd64.whl", hash = "sha256:97b974d3ba0fb4612b77ed35d7627490e8e3dff56ab41454d9e8b23448940576"}, + {file = "numpy-2.2.2-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b0531f0b0e07643eb089df4c509d30d72c9ef40defa53e41363eca8a8cc61495"}, + {file = "numpy-2.2.2-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e9e82dcb3f2ebbc8cb5ce1102d5f1c5ed236bf8a11730fb45ba82e2841ec21df"}, + {file = "numpy-2.2.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0d4142eb40ca6f94539e4db929410f2a46052a0fe7a2c1c59f6179c39938d2a"}, + {file = "numpy-2.2.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:356ca982c188acbfa6af0d694284d8cf20e95b1c3d0aefa8929376fea9146f60"}, + {file = "numpy-2.2.2.tar.gz", hash = "sha256:ed6906f61834d687738d25988ae117683705636936cc605be0bb208b23df4d8f"}, ] [[package]] @@ -3839,10 +3823,10 @@ files = [ numpy = [ {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] [[package]] @@ -3865,10 +3849,10 @@ files = [ numpy = [ {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""}, {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""}, + {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, - {version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, - {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, ] [[package]] @@ -3887,86 +3871,90 @@ et-xmlfile = "*" [[package]] name = "orjson" -version = "3.10.14" +version = "3.10.15" description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" optional = false python-versions = ">=3.8" files = [ - {file = "orjson-3.10.14-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:849ea7845a55f09965826e816cdc7689d6cf74fe9223d79d758c714af955bcb6"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5947b139dfa33f72eecc63f17e45230a97e741942955a6c9e650069305eb73d"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cde6d76910d3179dae70f164466692f4ea36da124d6fb1a61399ca589e81d69a"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6dfbaeb7afa77ca608a50e2770a0461177b63a99520d4928e27591b142c74b1"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa45e489ef80f28ff0e5ba0a72812b8cfc7c1ef8b46a694723807d1b07c89ebb"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f5007abfdbb1d866e2aa8990bd1c465f0f6da71d19e695fc278282be12cffa5"}, - {file = "orjson-3.10.14-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1b49e2af011c84c3f2d541bb5cd1e3c7c2df672223e7e3ea608f09cf295e5f8a"}, - {file = "orjson-3.10.14-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:164ac155109226b3a2606ee6dda899ccfbe6e7e18b5bdc3fbc00f79cc074157d"}, - {file = "orjson-3.10.14-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6b1225024cf0ef5d15934b5ffe9baf860fe8bc68a796513f5ea4f5056de30bca"}, - {file = "orjson-3.10.14-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d6546e8073dc382e60fcae4a001a5a1bc46da5eab4a4878acc2d12072d6166d5"}, - {file = "orjson-3.10.14-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9f1d2942605c894162252d6259b0121bf1cb493071a1ea8cb35d79cb3e6ac5bc"}, - {file = "orjson-3.10.14-cp310-cp310-win32.whl", hash = "sha256:397083806abd51cf2b3bbbf6c347575374d160331a2d33c5823e22249ad3118b"}, - {file = "orjson-3.10.14-cp310-cp310-win_amd64.whl", hash = "sha256:fa18f949d3183a8d468367056be989666ac2bef3a72eece0bade9cdb733b3c28"}, - {file = "orjson-3.10.14-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:f506fd666dd1ecd15a832bebc66c4df45c1902fd47526292836c339f7ba665a9"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efe5fd254cfb0eeee13b8ef7ecb20f5d5a56ddda8a587f3852ab2cedfefdb5f6"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4ddc8c866d7467f5ee2991397d2ea94bcf60d0048bdd8ca555740b56f9042725"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3af8e42ae4363773658b8d578d56dedffb4f05ceeb4d1d4dd3fb504950b45526"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84dd83110503bc10e94322bf3ffab8bc49150176b49b4984dc1cce4c0a993bf9"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36f5bfc0399cd4811bf10ec7a759c7ab0cd18080956af8ee138097d5b5296a95"}, - {file = "orjson-3.10.14-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868943660fb2a1e6b6b965b74430c16a79320b665b28dd4511d15ad5038d37d5"}, - {file = "orjson-3.10.14-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:33449c67195969b1a677533dee9d76e006001213a24501333624623e13c7cc8e"}, - {file = "orjson-3.10.14-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:e4c9f60f9fb0b5be66e416dcd8c9d94c3eabff3801d875bdb1f8ffc12cf86905"}, - {file = "orjson-3.10.14-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0de4d6315cfdbd9ec803b945c23b3a68207fd47cbe43626036d97e8e9561a436"}, - {file = "orjson-3.10.14-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:83adda3db595cb1a7e2237029b3249c85afbe5c747d26b41b802e7482cb3933e"}, - {file = "orjson-3.10.14-cp311-cp311-win32.whl", hash = "sha256:998019ef74a4997a9d741b1473533cdb8faa31373afc9849b35129b4b8ec048d"}, - {file = "orjson-3.10.14-cp311-cp311-win_amd64.whl", hash = "sha256:9d034abdd36f0f0f2240f91492684e5043d46f290525d1117712d5b8137784eb"}, - {file = "orjson-3.10.14-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:2ad4b7e367efba6dc3f119c9a0fcd41908b7ec0399a696f3cdea7ec477441b09"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f496286fc85e93ce0f71cc84fc1c42de2decf1bf494094e188e27a53694777a7"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c7f189bbfcded40e41a6969c1068ba305850ba016665be71a217918931416fbf"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8cc8204f0b75606869c707da331058ddf085de29558b516fc43c73ee5ee2aadb"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deaa2899dff7f03ab667e2ec25842d233e2a6a9e333efa484dfe666403f3501c"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1c3ea52642c9714dc6e56de8a451a066f6d2707d273e07fe8a9cc1ba073813d"}, - {file = "orjson-3.10.14-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9d3f9ed72e7458ded9a1fb1b4d4ed4c4fdbaf82030ce3f9274b4dc1bff7ace2b"}, - {file = "orjson-3.10.14-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:07520685d408a2aba514c17ccc16199ff2934f9f9e28501e676c557f454a37fe"}, - {file = "orjson-3.10.14-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:76344269b550ea01488d19a2a369ab572c1ac4449a72e9f6ac0d70eb1cbfb953"}, - {file = "orjson-3.10.14-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e2979d0f2959990620f7e62da6cd954e4620ee815539bc57a8ae46e2dacf90e3"}, - {file = "orjson-3.10.14-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03f61ca3674555adcb1aa717b9fc87ae936aa7a63f6aba90a474a88701278780"}, - {file = "orjson-3.10.14-cp312-cp312-win32.whl", hash = "sha256:d5075c54edf1d6ad81d4c6523ce54a748ba1208b542e54b97d8a882ecd810fd1"}, - {file = "orjson-3.10.14-cp312-cp312-win_amd64.whl", hash = "sha256:175cafd322e458603e8ce73510a068d16b6e6f389c13f69bf16de0e843d7d406"}, - {file = "orjson-3.10.14-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:0905ca08a10f7e0e0c97d11359609300eb1437490a7f32bbaa349de757e2e0c7"}, - {file = "orjson-3.10.14-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92d13292249f9f2a3e418cbc307a9fbbef043c65f4bd8ba1eb620bc2aaba3d15"}, - {file = "orjson-3.10.14-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90937664e776ad316d64251e2fa2ad69265e4443067668e4727074fe39676414"}, - {file = "orjson-3.10.14-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9ed3d26c4cb4f6babaf791aa46a029265850e80ec2a566581f5c2ee1a14df4f1"}, - {file = "orjson-3.10.14-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:56ee546c2bbe9599aba78169f99d1dc33301853e897dbaf642d654248280dc6e"}, - {file = "orjson-3.10.14-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:901e826cb2f1bdc1fcef3ef59adf0c451e8f7c0b5deb26c1a933fb66fb505eae"}, - {file = "orjson-3.10.14-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:26336c0d4b2d44636e1e1e6ed1002f03c6aae4a8a9329561c8883f135e9ff010"}, - {file = "orjson-3.10.14-cp313-cp313-win32.whl", hash = "sha256:e2bc525e335a8545c4e48f84dd0328bc46158c9aaeb8a1c2276546e94540ea3d"}, - {file = "orjson-3.10.14-cp313-cp313-win_amd64.whl", hash = "sha256:eca04dfd792cedad53dc9a917da1a522486255360cb4e77619343a20d9f35364"}, - {file = "orjson-3.10.14-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9a0fba3b8a587a54c18585f077dcab6dd251c170d85cfa4d063d5746cd595a0f"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:175abf3d20e737fec47261d278f95031736a49d7832a09ab684026528c4d96db"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29ca1a93e035d570e8b791b6c0feddd403c6a5388bfe870bf2aa6bba1b9d9b8e"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f77202c80e8ab5a1d1e9faf642343bee5aaf332061e1ada4e9147dbd9eb00c46"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e2ec73b7099b6a29b40a62e08a23b936423bd35529f8f55c42e27acccde7954"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a2d1679df9f9cd9504f8dff24555c1eaabba8aad7f5914f28dab99e3c2552c9d"}, - {file = "orjson-3.10.14-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:691ab9a13834310a263664313e4f747ceb93662d14a8bdf20eb97d27ed488f16"}, - {file = "orjson-3.10.14-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:b11ed82054fce82fb74cea33247d825d05ad6a4015ecfc02af5fbce442fbf361"}, - {file = "orjson-3.10.14-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:e70a1d62b8288677d48f3bea66c21586a5f999c64ecd3878edb7393e8d1b548d"}, - {file = "orjson-3.10.14-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:16642f10c1ca5611251bd835de9914a4b03095e28a34c8ba6a5500b5074338bd"}, - {file = "orjson-3.10.14-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3871bad546aa66c155e3f36f99c459780c2a392d502a64e23fb96d9abf338511"}, - {file = "orjson-3.10.14-cp38-cp38-win32.whl", hash = "sha256:0293a88815e9bb5c90af4045f81ed364d982f955d12052d989d844d6c4e50945"}, - {file = "orjson-3.10.14-cp38-cp38-win_amd64.whl", hash = "sha256:6169d3868b190d6b21adc8e61f64e3db30f50559dfbdef34a1cd6c738d409dfc"}, - {file = "orjson-3.10.14-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:06d4ec218b1ec1467d8d64da4e123b4794c781b536203c309ca0f52819a16c03"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962c2ec0dcaf22b76dee9831fdf0c4a33d4bf9a257a2bc5d4adc00d5c8ad9034"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:21d3be4132f71ef1360385770474f29ea1538a242eef72ac4934fe142800e37f"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c28ed60597c149a9e3f5ad6dd9cebaee6fb2f0e3f2d159a4a2b9b862d4748860"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e947f70167fe18469f2023644e91ab3d24f9aed69a5e1c78e2c81b9cea553fb"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64410696c97a35af2432dea7bdc4ce32416458159430ef1b4beb79fd30093ad6"}, - {file = "orjson-3.10.14-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8050a5d81c022561ee29cd2739de5b4445f3c72f39423fde80a63299c1892c52"}, - {file = "orjson-3.10.14-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b49a28e30d3eca86db3fe6f9b7f4152fcacbb4a467953cd1b42b94b479b77956"}, - {file = "orjson-3.10.14-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:ca041ad20291a65d853a9523744eebc3f5a4b2f7634e99f8fe88320695ddf766"}, - {file = "orjson-3.10.14-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:d313a2998b74bb26e9e371851a173a9b9474764916f1fc7971095699b3c6e964"}, - {file = "orjson-3.10.14-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7796692136a67b3e301ef9052bde6fe8e7bd5200da766811a3a608ffa62aaff0"}, - {file = "orjson-3.10.14-cp39-cp39-win32.whl", hash = "sha256:eee4bc767f348fba485ed9dc576ca58b0a9eac237f0e160f7a59bce628ed06b3"}, - {file = "orjson-3.10.14-cp39-cp39-win_amd64.whl", hash = "sha256:96a1c0ee30fb113b3ae3c748fd75ca74a157ff4c58476c47db4d61518962a011"}, - {file = "orjson-3.10.14.tar.gz", hash = "sha256:cf31f6f071a6b8e7aa1ead1fa27b935b48d00fbfa6a28ce856cfff2d5dd68eed"}, + {file = "orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:552c883d03ad185f720d0c09583ebde257e41b9521b74ff40e08b7dec4559c04"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:616e3e8d438d02e4854f70bfdc03a6bcdb697358dbaa6bcd19cbe24d24ece1f8"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c2c79fa308e6edb0ffab0a31fd75a7841bf2a79a20ef08a3c6e3b26814c8ca8"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:73cb85490aa6bf98abd20607ab5c8324c0acb48d6da7863a51be48505646c814"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:763dadac05e4e9d2bc14938a45a2d0560549561287d41c465d3c58aec818b164"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a330b9b4734f09a623f74a7490db713695e13b67c959713b78369f26b3dee6bf"}, + {file = "orjson-3.10.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a61a4622b7ff861f019974f73d8165be1bd9a0855e1cad18ee167acacabeb061"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:acd271247691574416b3228db667b84775c497b245fa275c6ab90dc1ffbbd2b3"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:e4759b109c37f635aa5c5cc93a1b26927bfde24b254bcc0e1149a9fada253d2d"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e992fd5cfb8b9f00bfad2fd7a05a4299db2bbe92e6440d9dd2fab27655b3182"}, + {file = "orjson-3.10.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f95fb363d79366af56c3f26b71df40b9a583b07bbaaf5b317407c4d58497852e"}, + {file = "orjson-3.10.15-cp310-cp310-win32.whl", hash = "sha256:f9875f5fea7492da8ec2444839dcc439b0ef298978f311103d0b7dfd775898ab"}, + {file = "orjson-3.10.15-cp310-cp310-win_amd64.whl", hash = "sha256:17085a6aa91e1cd70ca8533989a18b5433e15d29c574582f76f821737c8d5806"}, + {file = "orjson-3.10.15-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:c4cc83960ab79a4031f3119cc4b1a1c627a3dc09df125b27c4201dff2af7eaa6"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ddbeef2481d895ab8be5185f2432c334d6dec1f5d1933a9c83014d188e102cef"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9e590a0477b23ecd5b0ac865b1b907b01b3c5535f5e8a8f6ab0e503efb896334"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6be38bd103d2fd9bdfa31c2720b23b5d47c6796bcb1d1b598e3924441b4298d"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ff4f6edb1578960ed628a3b998fa54d78d9bb3e2eb2cfc5c2a09732431c678d0"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0482b21d0462eddd67e7fce10b89e0b6ac56570424662b685a0d6fccf581e13"}, + {file = "orjson-3.10.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bb5cc3527036ae3d98b65e37b7986a918955f85332c1ee07f9d3f82f3a6899b5"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d569c1c462912acdd119ccbf719cf7102ea2c67dd03b99edcb1a3048651ac96b"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:1e6d33efab6b71d67f22bf2962895d3dc6f82a6273a965fab762e64fa90dc399"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c33be3795e299f565681d69852ac8c1bc5c84863c0b0030b2b3468843be90388"}, + {file = "orjson-3.10.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:eea80037b9fae5339b214f59308ef0589fc06dc870578b7cce6d71eb2096764c"}, + {file = "orjson-3.10.15-cp311-cp311-win32.whl", hash = "sha256:d5ac11b659fd798228a7adba3e37c010e0152b78b1982897020a8e019a94882e"}, + {file = "orjson-3.10.15-cp311-cp311-win_amd64.whl", hash = "sha256:cf45e0214c593660339ef63e875f32ddd5aa3b4adc15e662cdb80dc49e194f8e"}, + {file = "orjson-3.10.15-cp312-cp312-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9d11c0714fc85bfcf36ada1179400862da3288fc785c30e8297844c867d7505a"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dba5a1e85d554e3897fa9fe6fbcff2ed32d55008973ec9a2b992bd9a65d2352d"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7723ad949a0ea502df656948ddd8b392780a5beaa4c3b5f97e525191b102fff0"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6fd9bc64421e9fe9bd88039e7ce8e58d4fead67ca88e3a4014b143cec7684fd4"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dadba0e7b6594216c214ef7894c4bd5f08d7c0135f4dd0145600be4fbcc16767"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b48f59114fe318f33bbaee8ebeda696d8ccc94c9e90bc27dbe72153094e26f41"}, + {file = "orjson-3.10.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:035fb83585e0f15e076759b6fedaf0abb460d1765b6a36f48018a52858443514"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d13b7fe322d75bf84464b075eafd8e7dd9eae05649aa2a5354cfa32f43c59f17"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7066b74f9f259849629e0d04db6609db4cf5b973248f455ba5d3bd58a4daaa5b"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:88dc3f65a026bd3175eb157fea994fca6ac7c4c8579fc5a86fc2114ad05705b7"}, + {file = "orjson-3.10.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b342567e5465bd99faa559507fe45e33fc76b9fb868a63f1642c6bc0735ad02a"}, + {file = "orjson-3.10.15-cp312-cp312-win32.whl", hash = "sha256:0a4f27ea5617828e6b58922fdbec67b0aa4bb844e2d363b9244c47fa2180e665"}, + {file = "orjson-3.10.15-cp312-cp312-win_amd64.whl", hash = "sha256:ef5b87e7aa9545ddadd2309efe6824bd3dd64ac101c15dae0f2f597911d46eaa"}, + {file = "orjson-3.10.15-cp313-cp313-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:bae0e6ec2b7ba6895198cd981b7cca95d1487d0147c8ed751e5632ad16f031a6"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f93ce145b2db1252dd86af37d4165b6faa83072b46e3995ecc95d4b2301b725a"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7c203f6f969210128af3acae0ef9ea6aab9782939f45f6fe02d05958fe761ef9"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8918719572d662e18b8af66aef699d8c21072e54b6c82a3f8f6404c1f5ccd5e0"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f71eae9651465dff70aa80db92586ad5b92df46a9373ee55252109bb6b703307"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e117eb299a35f2634e25ed120c37c641398826c2f5a3d3cc39f5993b96171b9e"}, + {file = "orjson-3.10.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:13242f12d295e83c2955756a574ddd6741c81e5b99f2bef8ed8d53e47a01e4b7"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7946922ada8f3e0b7b958cc3eb22cfcf6c0df83d1fe5521b4a100103e3fa84c8"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:b7155eb1623347f0f22c38c9abdd738b287e39b9982e1da227503387b81b34ca"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:208beedfa807c922da4e81061dafa9c8489c6328934ca2a562efa707e049e561"}, + {file = "orjson-3.10.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eca81f83b1b8c07449e1d6ff7074e82e3fd6777e588f1a6632127f286a968825"}, + {file = "orjson-3.10.15-cp313-cp313-win32.whl", hash = "sha256:c03cd6eea1bd3b949d0d007c8d57049aa2b39bd49f58b4b2af571a5d3833d890"}, + {file = "orjson-3.10.15-cp313-cp313-win_amd64.whl", hash = "sha256:fd56a26a04f6ba5fb2045b0acc487a63162a958ed837648c5781e1fe3316cfbf"}, + {file = "orjson-3.10.15-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:5e8afd6200e12771467a1a44e5ad780614b86abb4b11862ec54861a82d677746"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da9a18c500f19273e9e104cca8c1f0b40a6470bcccfc33afcc088045d0bf5ea6"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb00b7bfbdf5d34a13180e4805d76b4567025da19a197645ca746fc2fb536586"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:33aedc3d903378e257047fee506f11e0833146ca3e57a1a1fb0ddb789876c1e1"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dd0099ae6aed5eb1fc84c9eb72b95505a3df4267e6962eb93cdd5af03be71c98"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c864a80a2d467d7786274fce0e4f93ef2a7ca4ff31f7fc5634225aaa4e9e98c"}, + {file = "orjson-3.10.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c25774c9e88a3e0013d7d1a6c8056926b607a61edd423b50eb5c88fd7f2823ae"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:e78c211d0074e783d824ce7bb85bf459f93a233eb67a5b5003498232ddfb0e8a"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:43e17289ffdbbac8f39243916c893d2ae41a2ea1a9cbb060a56a4d75286351ae"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:781d54657063f361e89714293c095f506c533582ee40a426cb6489c48a637b81"}, + {file = "orjson-3.10.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:6875210307d36c94873f553786a808af2788e362bd0cf4c8e66d976791e7b528"}, + {file = "orjson-3.10.15-cp38-cp38-win32.whl", hash = "sha256:305b38b2b8f8083cc3d618927d7f424349afce5975b316d33075ef0f73576b60"}, + {file = "orjson-3.10.15-cp38-cp38-win_amd64.whl", hash = "sha256:5dd9ef1639878cc3efffed349543cbf9372bdbd79f478615a1c633fe4e4180d1"}, + {file = "orjson-3.10.15-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:ffe19f3e8d68111e8644d4f4e267a069ca427926855582ff01fc012496d19969"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d433bf32a363823863a96561a555227c18a522a8217a6f9400f00ddc70139ae2"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:da03392674f59a95d03fa5fb9fe3a160b0511ad84b7a3914699ea5a1b3a38da2"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3a63bb41559b05360ded9132032239e47983a39b151af1201f07ec9370715c82"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3766ac4702f8f795ff3fa067968e806b4344af257011858cc3d6d8721588b53f"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a1c73dcc8fadbd7c55802d9aa093b36878d34a3b3222c41052ce6b0fc65f8e8"}, + {file = "orjson-3.10.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b299383825eafe642cbab34be762ccff9fd3408d72726a6b2a4506d410a71ab3"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:abc7abecdbf67a173ef1316036ebbf54ce400ef2300b4e26a7b843bd446c2480"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:3614ea508d522a621384c1d6639016a5a2e4f027f3e4a1c93a51867615d28829"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:295c70f9dc154307777ba30fe29ff15c1bcc9dfc5c48632f37d20a607e9ba85a"}, + {file = "orjson-3.10.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:63309e3ff924c62404923c80b9e2048c1f74ba4b615e7584584389ada50ed428"}, + {file = "orjson-3.10.15-cp39-cp39-win32.whl", hash = "sha256:a2f708c62d026fb5340788ba94a55c23df4e1869fec74be455e0b2f5363b8507"}, + {file = "orjson-3.10.15-cp39-cp39-win_amd64.whl", hash = "sha256:efcf6c735c3d22ef60c4aa27a5238f1a477df85e9b15f2142f9d669beb2d13fd"}, + {file = "orjson-3.10.15.tar.gz", hash = "sha256:05ca7fe452a2e9d8d9d706a2984c95b9c2ebc5db417ce0b7a49b91d50642a23e"}, ] [[package]] @@ -4049,8 +4037,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.22.4", markers = "python_version < \"3.11\""}, - {version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.26.0", markers = "python_version >= \"3.12\""}, + {version = ">=1.23.2", markers = "python_version == \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -4309,13 +4297,13 @@ virtualenv = ">=20.10.0" [[package]] name = "prompt-toolkit" -version = "3.0.48" +version = "3.0.50" description = "Library for building powerful interactive command lines in Python" optional = false -python-versions = ">=3.7.0" +python-versions = ">=3.8.0" files = [ - {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"}, - {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"}, + {file = "prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198"}, + {file = "prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab"}, ] [package.dependencies] @@ -4830,13 +4818,13 @@ testutils = ["gitpython (>3)"] [[package]] name = "pymdown-extensions" -version = "10.14" +version = "10.14.1" description = "Extension pack for Python Markdown." optional = false python-versions = ">=3.8" files = [ - {file = "pymdown_extensions-10.14-py3-none-any.whl", hash = "sha256:202481f716cc8250e4be8fce997781ebf7917701b59652458ee47f2401f818b5"}, - {file = "pymdown_extensions-10.14.tar.gz", hash = "sha256:741bd7c4ff961ba40b7528d32284c53bc436b8b1645e8e37c3e57770b8700a34"}, + {file = "pymdown_extensions-10.14.1-py3-none-any.whl", hash = "sha256:637951cbfbe9874ba28134fb3ce4b8bcadd6aca89ac4998ec29dcbafd554ae08"}, + {file = "pymdown_extensions-10.14.1.tar.gz", hash = "sha256:b65801996a0cd4f42a3110810c306c45b7313c09b0610a6f773730f2a9e3c96b"}, ] [package.dependencies] @@ -4848,13 +4836,13 @@ extra = ["pygments (>=2.19.1)"] [[package]] name = "pymilvus" -version = "2.5.3" +version = "2.5.4" description = "Python Sdk for Milvus" optional = false python-versions = ">=3.8" files = [ - {file = "pymilvus-2.5.3-py3-none-any.whl", hash = "sha256:64ca63594284586937274800be27a402f3be2d078130bf81d94ab8d7798ac9c8"}, - {file = "pymilvus-2.5.3.tar.gz", hash = "sha256:68bc3797b7a14c494caf116cee888894ffd6eba7b96a3ac841be85d60694cc5d"}, + {file = "pymilvus-2.5.4-py3-none-any.whl", hash = "sha256:3f7ddaeae0c8f63554b8e316b73f265d022e05a457d47c366ce47293434a3aea"}, + {file = "pymilvus-2.5.4.tar.gz", hash = "sha256:611732428ff669d57ded3d1f823bdeb10febf233d0251cce8498b287e5a10ce8"}, ] [package.dependencies] @@ -6986,13 +6974,13 @@ vision = ["Pillow (>=10.0.1,<=15.0)"] [[package]] name = "transformers" -version = "4.48.0" +version = "4.48.1" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" optional = false python-versions = ">=3.9.0" files = [ - {file = "transformers-4.48.0-py3-none-any.whl", hash = "sha256:6d3de6d71cb5f2a10f9775ccc17abce9620195caaf32ec96542bd2a6937f25b0"}, - {file = "transformers-4.48.0.tar.gz", hash = "sha256:03fdfcbfb8b0367fb6c9fbe9d1c9aa54dfd847618be9b52400b2811d22799cb1"}, + {file = "transformers-4.48.1-py3-none-any.whl", hash = "sha256:24be0564b0a36d9e433d9a65de248f1545b6f6edce1737669605eb6a8141bbbb"}, + {file = "transformers-4.48.1.tar.gz", hash = "sha256:7c1931facc3ee8adcbf86fc7a87461d54c1e40eca3bb57fef1ee9f3ecd32187e"}, ] [package.dependencies] @@ -7187,13 +7175,13 @@ files = [ [[package]] name = "tzdata" -version = "2024.2" +version = "2025.1" description = "Provider of IANA time zone data" optional = false python-versions = ">=2" files = [ - {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"}, - {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"}, + {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, + {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, ] [[package]] @@ -7302,13 +7290,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "virtualenv" -version = "20.29.0" +version = "20.29.1" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.29.0-py3-none-any.whl", hash = "sha256:c12311863497992dc4b8644f8ea82d3b35bb7ef8ee82e6630d76d0197c39baf9"}, - {file = "virtualenv-20.29.0.tar.gz", hash = "sha256:6345e1ff19d4b1296954cee076baaf58ff2a12a84a338c62b02eda39f20aa982"}, + {file = "virtualenv-20.29.1-py3-none-any.whl", hash = "sha256:4e4cb403c0b0da39e13b46b1b2476e505cb0046b25f242bee80f62bf990b2779"}, + {file = "virtualenv-20.29.1.tar.gz", hash = "sha256:b8b8970138d32fb606192cb97f6cd4bb644fa486be9308fb9b63f81091b5dc35"}, ] [package.dependencies] @@ -7763,4 +7751,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "0c0bb9470144867d7f11dec7c3020ac3d4d9ef18b355c0852a9cb89b6270981d" +content-hash = "8bb0b67294a50c0340c5cc02ce60d3608ef4d1968ae50f7e0b8b4c8a26c34734" diff --git a/pyproject.toml b/pyproject.toml index 63f6016..c3e1fa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,9 +25,9 @@ packages = [{include = "docling"}] # actual dependencies: ###################### python = "^3.9" -docling-core = { version = "^2.13.1", extras = ["chunking"] } pydantic = "^2.0.0" -docling-ibm-models = "^3.1.0" +docling-core = { version = "^2.15.1", extras = ["chunking"] } +docling-ibm-models = "^3.2.1" deepsearch-glm = "^1.0.0" docling-parse = "^3.1.0" filetype = "^1.2.0" diff --git a/tests/data/code_and_formula.pdf b/tests/data/code_and_formula.pdf new file mode 100644 index 0000000..82cd834 Binary files /dev/null and b/tests/data/code_and_formula.pdf differ diff --git a/tests/data/groundtruth/docling_v1/code_and_formula.doctags.txt b/tests/data/groundtruth/docling_v1/code_and_formula.doctags.txt new file mode 100644 index 0000000..1f1a0db --- /dev/null +++ b/tests/data/groundtruth/docling_v1/code_and_formula.doctags.txt @@ -0,0 +1,13 @@ + +Java Code Example +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Listing 1: Simple Java Program +public static void print() { System.out.println( "Java Code" ); } +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Formula +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt. +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. + \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v1/code_and_formula.json b/tests/data/groundtruth/docling_v1/code_and_formula.json new file mode 100644 index 0000000..8b90e79 --- /dev/null +++ b/tests/data/groundtruth/docling_v1/code_and_formula.json @@ -0,0 +1 @@ +{"_name": "", "type": "pdf-document", "description": {"title": null, "abstract": null, "authors": null, "affiliations": null, "subjects": null, "keywords": null, "publication_date": null, "languages": null, "license": null, "publishers": null, "url_refs": null, "references": null, "publication": null, "reference_count": null, "citation_count": null, "citation_date": null, "advanced": null, "analytics": null, "logs": [], "collection": null, "acquisition": null}, "file-info": {"filename": "code_and_formula.pdf", "filename-prov": null, "document-hash": "71dd5c52aa241a6d5275d5f681023b6bb3dee3346e171075d1247c87d49e7a82", "#-pages": 2, "collection-name": null, "description": null, "page-hashes": [{"hash": "d3280840ba2dff9749506fce3f05166336b768d2096f6b332fcbfacbc0540edf", "model": "default", "page": 1}, {"hash": "e9aac72f810d71aeb3710b13689d8c55e6eeda390fb497f09e865f1e94f74f2b", "model": "default", "page": 2}]}, "main-text": [{"prov": [{"bbox": [133.76800537109375, 653.6340942382812, 273.4540100097656, 667.99462890625], "page": 1, "span": [0, 17], "__ref_s3_data": null}], "text": "Java Code Example", "type": "subtitle-level-1", "payload": null, "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.76800537109375, 501.4163513183594, 477.48065185546875, 642.8859252929688], "page": 1, "span": [0, 887], "__ref_s3_data": null}], "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [236.17599487304688, 480.4953308105469, 375.069580078125, 490.45794677734375], "page": 1, "span": [0, 30], "__ref_s3_data": null}], "text": "Listing 1: Simple Java Program", "type": "paragraph", "payload": null, "name": "paragraph", "font": null}, {"prov": [{"bbox": [134.23899841308594, 443.9358215332031, 337.5928649902344, 474.2005310058594], "page": 1, "span": [0, 65], "__ref_s3_data": null}], "text": "public static void print() { System.out.println( \"Java Code\" ); }", "type": "paragraph", "payload": null, "name": "Code", "font": null}, {"prov": [{"bbox": [133.76800537109375, 290.80633544921875, 477.47589111328125, 432.27593994140625], "page": 1, "span": [0, 887], "__ref_s3_data": null}], "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [303.13299560546875, 86.87435150146484, 308.1142883300781, 96.83694458007812], "page": 1, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-footer", "payload": null, "name": "Page-footer", "font": null}, {"prov": [{"bbox": [133.76800537109375, 703.5241088867188, 191.51429748535156, 717.8846435546875], "page": 2, "span": [0, 7], "__ref_s3_data": null}], "text": "Formula", "type": "subtitle-level-1", "payload": null, "name": "Section-header", "font": null}, {"prov": [{"bbox": [133.76800537109375, 551.3063354492188, 477.48065185546875, 692.7759399414062], "page": 2, "span": [0, 887], "__ref_s3_data": null}], "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [133.76800537109375, 491.53033447265625, 477.4748229980469, 549.3139038085938], "page": 2, "span": [0, 369], "__ref_s3_data": null}], "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [280.5539855957031, 467.6203308105469, 330.69659423828125, 479.4553527832031], "page": 2, "span": [0, 12], "__ref_s3_data": null}], "text": "a 2 + 8 = 12", "type": "equation", "payload": null, "name": "Formula", "font": null}, {"prov": [{"bbox": [133.76800537109375, 318.1803283691406, 477.47589111328125, 459.64996337890625], "page": 2, "span": [0, 887], "__ref_s3_data": null}], "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [133.76800537109375, 246.44935607910156, 477.4748229980469, 316.1879577636719], "page": 2, "span": [0, 415], "__ref_s3_data": null}], "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [133.76800537109375, 174.71835327148438, 477.4748229980469, 244.4569549560547], "page": 2, "span": [0, 415], "__ref_s3_data": null}], "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.", "type": "paragraph", "payload": null, "name": "Text", "font": null}, {"prov": [{"bbox": [303.13299560546875, 136.7633514404297, 308.1142883300781, 146.7259521484375], "page": 2, "span": [0, 1], "__ref_s3_data": null}], "text": "1", "type": "page-footer", "payload": null, "name": "Page-footer", "font": null}], "figures": [], "tables": [], "bitmaps": null, "equations": [], "footnotes": [], "page-dimensions": [{"height": 792.0, "page": 1, "width": 612.0}, {"height": 841.8900146484375, "page": 2, "width": 595.2760009765625}], "page-footers": [], "page-headers": [], "_s3_data": null, "identifiers": null} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v1/code_and_formula.md b/tests/data/groundtruth/docling_v1/code_and_formula.md new file mode 100644 index 0000000..4d77dc5 --- /dev/null +++ b/tests/data/groundtruth/docling_v1/code_and_formula.md @@ -0,0 +1,19 @@ +## Java Code Example + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Listing 1: Simple Java Program + +public static void print() { System.out.println( "Java Code" ); } + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +## Formula + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt. + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v1/code_and_formula.pages.json b/tests/data/groundtruth/docling_v1/code_and_formula.pages.json new file mode 100644 index 0000000..27682fd --- /dev/null +++ b/tests/data/groundtruth/docling_v1/code_and_formula.pages.json @@ -0,0 +1 @@ +[{"page_no": 0, "size": {"width": 612.0, "height": 792.0}, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}, {"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "section_header", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Java Code Example"}, {"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "caption", "id": 5, "page_no": 0, "cluster": {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Listing 1: Simple Java Program"}, {"label": "code", "id": 4, "page_no": 0, "cluster": {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "public static void print() { System.out.println( \"Java Code\" ); }"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "page_footer", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}], "body": [{"label": "section_header", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Java Code Example"}, {"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "caption", "id": 5, "page_no": 0, "cluster": {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Listing 1: Simple Java Program"}, {"label": "code", "id": 4, "page_no": 0, "cluster": {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "public static void print() { System.out.println( \"Java Code\" ); }"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}], "headers": [{"label": "page_footer", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}]}}, {"page_no": 1, "size": {"width": 595.2760009765625, "height": 841.8900146484375}, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}, {"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}, {"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}, {"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}, {"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "section_header", "id": 5, "page_no": 1, "cluster": {"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Formula"}, {"label": "text", "id": 1, "page_no": 1, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 3, "page_no": 1, "cluster": {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt."}, {"label": "formula", "id": 6, "page_no": 1, "cluster": {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "a 2 + 8 = 12"}, {"label": "text", "id": 0, "page_no": 1, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 4, "page_no": 1, "cluster": {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "text", "id": 2, "page_no": 1, "cluster": {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "page_footer", "id": 7, "page_no": 1, "cluster": {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}], "body": [{"label": "section_header", "id": 5, "page_no": 1, "cluster": {"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Formula"}, {"label": "text", "id": 1, "page_no": 1, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 3, "page_no": 1, "cluster": {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt."}, {"label": "formula", "id": 6, "page_no": 1, "cluster": {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "a 2 + 8 = 12"}, {"label": "text", "id": 0, "page_no": 1, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 4, "page_no": 1, "cluster": {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "text", "id": 2, "page_no": 1, "cluster": {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}], "headers": [{"label": "page_footer", "id": 7, "page_no": 1, "cluster": {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}]}}] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/code_and_formula.doctags.txt b/tests/data/groundtruth/docling_v2/code_and_formula.doctags.txt new file mode 100644 index 0000000..ad41754 --- /dev/null +++ b/tests/data/groundtruth/docling_v2/code_and_formula.doctags.txt @@ -0,0 +1,14 @@ + +Java Code Example +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Listing 1: Simple Java Program +public static void print() { System.out.println( "Java Code" ); } +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Formula +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt. +a 2 + 8 = 12 +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. + \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/code_and_formula.json b/tests/data/groundtruth/docling_v2/code_and_formula.json new file mode 100644 index 0000000..adcc14d --- /dev/null +++ b/tests/data/groundtruth/docling_v2/code_and_formula.json @@ -0,0 +1 @@ +{"schema_name": "DoclingDocument", "version": "1.0.0", "name": "code_and_formula", "origin": {"mimetype": "application/pdf", "binary_hash": 2394749058180317456, "filename": "code_and_formula.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}, {"cref": "#/texts/1"}, {"cref": "#/texts/2"}, {"cref": "#/texts/3"}, {"cref": "#/texts/4"}, {"cref": "#/texts/5"}, {"cref": "#/texts/6"}, {"cref": "#/texts/7"}, {"cref": "#/texts/8"}, {"cref": "#/texts/9"}, {"cref": "#/texts/10"}, {"cref": "#/texts/11"}, {"cref": "#/texts/12"}, {"cref": "#/texts/13"}], "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 1, "bbox": {"l": 133.76800537109375, "t": 667.99462890625, "r": 273.4540100097656, "b": 653.6340942382812, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 17]}], "orig": "Java Code Example", "text": "Java Code Example", "level": 1}, {"self_ref": "#/texts/1", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 133.76800537109375, "t": 642.8859252929688, "r": 477.48065185546875, "b": 501.4163513183594, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 887]}], "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"self_ref": "#/texts/2", "parent": {"cref": "#/body"}, "children": [], "label": "paragraph", "prov": [{"page_no": 1, "bbox": {"l": 236.17599487304688, "t": 490.45794677734375, "r": 375.069580078125, "b": 480.4953308105469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 30]}], "orig": "Listing 1: Simple Java Program", "text": "Listing 1: Simple Java Program"}, {"self_ref": "#/texts/3", "parent": {"cref": "#/body"}, "children": [], "label": "code", "prov": [{"page_no": 1, "bbox": {"l": 134.23899841308594, "t": 474.2005310058594, "r": 337.5928649902344, "b": 443.9358215332031, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 65]}], "orig": "public static void print() { System.out.println( \"Java Code\" ); }", "text": "public static void print() { System.out.println( \"Java Code\" ); }", "code_language": "unknown"}, {"self_ref": "#/texts/4", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 133.76800537109375, "t": 432.27593994140625, "r": 477.47589111328125, "b": 290.80633544921875, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 887]}], "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"self_ref": "#/texts/5", "parent": {"cref": "#/body"}, "children": [], "label": "page_footer", "prov": [{"page_no": 1, "bbox": {"l": 303.13299560546875, "t": 96.83694458007812, "r": 308.1142883300781, "b": 86.87435150146484, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}, {"self_ref": "#/texts/6", "parent": {"cref": "#/body"}, "children": [], "label": "section_header", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 717.8846435546875, "r": 191.51429748535156, "b": 703.5241088867188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 7]}], "orig": "Formula", "text": "Formula", "level": 1}, {"self_ref": "#/texts/7", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 692.7759399414062, "r": 477.48065185546875, "b": 551.3063354492188, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 887]}], "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"self_ref": "#/texts/8", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 549.3139038085938, "r": 477.4748229980469, "b": 491.53033447265625, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 369]}], "orig": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt.", "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt."}, {"self_ref": "#/texts/9", "parent": {"cref": "#/body"}, "children": [], "label": "formula", "prov": [{"page_no": 2, "bbox": {"l": 280.5539855957031, "t": 479.4553527832031, "r": 330.69659423828125, "b": 467.6203308105469, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 12]}], "orig": "a 2 + 8 = 12", "text": "a 2 + 8 = 12"}, {"self_ref": "#/texts/10", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 459.64996337890625, "r": 477.47589111328125, "b": 318.1803283691406, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 887]}], "orig": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.", "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"self_ref": "#/texts/11", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 316.1879577636719, "r": 477.4748229980469, "b": 246.44935607910156, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 415]}], "orig": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.", "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"self_ref": "#/texts/12", "parent": {"cref": "#/body"}, "children": [], "label": "text", "prov": [{"page_no": 2, "bbox": {"l": 133.76800537109375, "t": 244.4569549560547, "r": 477.4748229980469, "b": 174.71835327148438, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 415]}], "orig": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat.", "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"self_ref": "#/texts/13", "parent": {"cref": "#/body"}, "children": [], "label": "page_footer", "prov": [{"page_no": 2, "bbox": {"l": 303.13299560546875, "t": 146.7259521484375, "r": 308.1142883300781, "b": 136.7633514404297, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 1]}], "orig": "1", "text": "1"}], "pictures": [], "tables": [], "key_value_items": [], "pages": {"1": {"size": {"width": 612.0, "height": 792.0}, "image": null, "page_no": 1}, "2": {"size": {"width": 595.2760009765625, "height": 841.8900146484375}, "image": null, "page_no": 2}}} \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/code_and_formula.md b/tests/data/groundtruth/docling_v2/code_and_formula.md new file mode 100644 index 0000000..2e2636a --- /dev/null +++ b/tests/data/groundtruth/docling_v2/code_and_formula.md @@ -0,0 +1,25 @@ +## Java Code Example + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Listing 1: Simple Java Program + +``` +public static void print() { System.out.println( "Java Code" ); } +``` + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +## Formula + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt. + +a 2 + 8 = 12 + +Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. + +Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat. \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/code_and_formula.pages.json b/tests/data/groundtruth/docling_v2/code_and_formula.pages.json new file mode 100644 index 0000000..27682fd --- /dev/null +++ b/tests/data/groundtruth/docling_v2/code_and_formula.pages.json @@ -0,0 +1 @@ +[{"page_no": 0, "size": {"width": 612.0, "height": 792.0}, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}, {"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "section_header", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Java Code Example"}, {"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "caption", "id": 5, "page_no": 0, "cluster": {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Listing 1: Simple Java Program"}, {"label": "code", "id": 4, "page_no": 0, "cluster": {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "public static void print() { System.out.println( \"Java Code\" ); }"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "page_footer", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}], "body": [{"label": "section_header", "id": 2, "page_no": 0, "cluster": {"id": 2, "label": "section_header", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}, "confidence": 0.9581764936447144, "cells": [{"id": 0, "text": "Java Code Example", "bbox": {"l": 133.76801, "t": 124.00536999999997, "r": 273.45401, "b": 138.36590999999999, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Java Code Example"}, {"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.4806500000001, "b": 290.58365, "coord_origin": "TOPLEFT"}, "confidence": 0.9872456789016724, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801, "t": 149.11406999999997, "r": 477.47687, "b": 159.07665999999995, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801, "t": 161.06908999999996, "r": 477.47781000000003, "b": 171.03168000000005, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801, "t": 173.02405, "r": 477.46985, "b": 182.98663, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801, "t": 184.97906, "r": 477.47186, "b": 194.94164999999998, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801, "t": 196.93408, "r": 477.46993999999995, "b": 206.89666999999997, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801, "t": 208.88909999999998, "r": 477.4806500000001, "b": 218.85168, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801, "t": 220.84509000000003, "r": 355.20087, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.60107, "t": 220.84509000000003, "r": 477.47495000000004, "b": 230.80768, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 232.80005000000006, "r": 477.47083, "b": 242.76262999999994, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 244.75507000000005, "r": 477.47092, "b": 254.71765000000005, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 256.71007999999995, "r": 477.46588, "b": 266.67267000000004, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 268.66510000000005, "r": 477.47586, "b": 278.62769000000003, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 280.62103, "r": 351.47672, "b": 290.58365, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "caption", "id": 5, "page_no": 0, "cluster": {"id": 5, "label": "caption", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}, "confidence": 0.5425266027450562, "cells": [{"id": 14, "text": "Listing 1: Simple Java Program", "bbox": {"l": 236.17598999999998, "t": 301.54204999999996, "r": 375.06958, "b": 311.50467, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Listing 1: Simple Java Program"}, {"label": "code", "id": 4, "page_no": 0, "cluster": {"id": 4, "label": "code", "bbox": {"l": 134.239, "t": 317.79947000000004, "r": 337.59286, "b": 348.06418, "coord_origin": "TOPLEFT"}, "confidence": 0.6383119821548462, "cells": [{"id": 15, "text": "public static void", "bbox": {"l": 134.575, "t": 317.79947000000004, "r": 235.44695, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "print() {", "bbox": {"l": 241.8759, "t": 317.79947000000004, "r": 292.24908, "b": 326.14719, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "System.out.println(", "bbox": {"l": 157.17101, "t": 328.75745, "r": 264.02368, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "\"Java Code\"", "bbox": {"l": 264.15811, "t": 328.75745, "r": 325.35376, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": ");", "bbox": {"l": 326.2952, "t": 328.75745, "r": 337.59286, "b": 337.10516000000007, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "}", "bbox": {"l": 134.239, "t": 339.71646, "r": 138.94637, "b": 348.06418, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "public static void print() { System.out.println( \"Java Code\" ); }"}, {"label": "text", "id": 1, "page_no": 0, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801, "t": 359.72406, "r": 477.47589, "b": 501.19366, "coord_origin": "TOPLEFT"}, "confidence": 0.9869542717933655, "cells": [{"id": 21, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201, "t": 359.72406, "r": 477.46985, "b": 369.68667999999997, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801, "t": 371.67905, "r": 477.4758, "b": 381.64166000000006, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801, "t": 383.63403, "r": 477.4758, "b": 393.59665, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801, "t": 395.58905, "r": 477.47284, "b": 405.55167, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801, "t": 407.54404, "r": 477.47589, "b": 417.50665, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801, "t": 419.50003000000004, "r": 477.47177000000005, "b": 429.46265, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801, "t": 431.45505, "r": 477.47387999999995, "b": 441.41766000000007, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801, "t": 443.41003, "r": 477.47083, "b": 453.37265, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801, "t": 455.36505, "r": 477.47092, "b": 465.32767, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801, "t": 467.32004, "r": 477.46585, "b": 477.28265, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801, "t": 479.27603, "r": 477.47586, "b": 489.23865, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801, "t": 491.23105, "r": 351.47672, "b": 501.19366, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}], "headers": [{"label": "page_footer", "id": 3, "page_no": 0, "cluster": {"id": 3, "label": "page_footer", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}, "confidence": 0.8374724984169006, "cells": [{"id": 33, "text": "1", "bbox": {"l": 303.133, "t": 695.163055, "r": 308.11429, "b": 705.125648, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}]}}, {"page_no": 1, "size": {"width": 595.2760009765625, "height": 841.8900146484375}, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}, {"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}, {"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}, {"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}, {"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}, {"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}, {"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "predictions": {"layout": {"clusters": [{"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "section_header", "id": 5, "page_no": 1, "cluster": {"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Formula"}, {"label": "text", "id": 1, "page_no": 1, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 3, "page_no": 1, "cluster": {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt."}, {"label": "formula", "id": 6, "page_no": 1, "cluster": {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "a 2 + 8 = 12"}, {"label": "text", "id": 0, "page_no": 1, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 4, "page_no": 1, "cluster": {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "text", "id": 2, "page_no": 1, "cluster": {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "page_footer", "id": 7, "page_no": 1, "cluster": {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}], "body": [{"label": "section_header", "id": 5, "page_no": 1, "cluster": {"id": 5, "label": "section_header", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}, "confidence": 0.95060133934021, "cells": [{"id": 0, "text": "Formula", "bbox": {"l": 133.76801021944917, "t": 124.00537068468714, "r": 191.51430031418315, "b": 138.36590076397772, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Formula"}, {"label": "text", "id": 1, "page_no": 1, "cluster": {"id": 1, "label": "text", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.4806507833169, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}, "confidence": 0.9878177046775818, "cells": [{"id": 1, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eir-", "bbox": {"l": 133.76801021944917, "t": 149.114070823323, "r": 477.47687078331063, "b": 159.0766608783307, "coord_origin": "TOPLEFT"}}, {"id": 2, "text": "mod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam volup-", "bbox": {"l": 133.76801021944917, "t": 161.06909088933185, "r": 477.4778107833122, "b": 171.03167094433945, "coord_origin": "TOPLEFT"}}, {"id": 3, "text": "tua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd", "bbox": {"l": 133.76801021944917, "t": 173.0241009553406, "r": 477.46985078329914, "b": 182.9866910103483, "coord_origin": "TOPLEFT"}}, {"id": 4, "text": "gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ip-", "bbox": {"l": 133.76801021944917, "t": 184.97906102134914, "r": 477.4718607833024, "b": 194.94165107635683, "coord_origin": "TOPLEFT"}}, {"id": 5, "text": "sum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor", "bbox": {"l": 133.76801021944917, "t": 196.93511108736357, "r": 477.4699407832993, "b": 206.89770114237137, "coord_origin": "TOPLEFT"}}, {"id": 6, "text": "invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero", "bbox": {"l": 133.76801021944917, "t": 208.890071153372, "r": 477.4806507833169, "b": 218.8526612083799, "coord_origin": "TOPLEFT"}}, {"id": 7, "text": "eos et accusam et justo duo dolores et ea rebum.", "bbox": {"l": 133.76801021944917, "t": 220.84509121938083, "r": 355.20087058271434, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 8, "text": "Stet clita kasd gubergren,", "bbox": {"l": 362.6010705948545, "t": 220.84509121938083, "r": 477.4749507833075, "b": 230.80767127438855, "coord_origin": "TOPLEFT"}}, {"id": 9, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 232.8001112853898, "r": 477.4708307833007, "b": 242.76269134039728, "coord_origin": "TOPLEFT"}}, {"id": 10, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 244.75506135139813, "r": 477.4709207833008, "b": 254.71765140640582, "coord_origin": "TOPLEFT"}}, {"id": 11, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 256.71008141740685, "r": 477.4658807832926, "b": 266.6726614724146, "coord_origin": "TOPLEFT"}}, {"id": 12, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 268.6660714834211, "r": 477.47586078330903, "b": 278.6286615384289, "coord_origin": "TOPLEFT"}}, {"id": 13, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 280.62109154942993, "r": 351.4767205766048, "b": 290.58367160443765, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 3, "page_no": 1, "cluster": {"id": 3, "label": "text", "bbox": {"l": 133.76801021944917, "t": 292.5761116154388, "r": 477.47482078330734, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}, "confidence": 0.9855858683586121, "cells": [{"id": 14, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 292.5761116154388, "r": 477.4710707833011, "b": 302.5386916704465, "coord_origin": "TOPLEFT"}}, {"id": 15, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 304.5310616814471, "r": 477.47293078330415, "b": 314.4936517364549, "coord_origin": "TOPLEFT"}}, {"id": 16, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 316.48608174745607, "r": 477.47482078330734, "b": 326.4486618024637, "coord_origin": "TOPLEFT"}}, {"id": 17, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 328.4411018134648, "r": 477.472810783304, "b": 338.40368186847246, "coord_origin": "TOPLEFT"}}, {"id": 18, "text": "sed diam nonummy nibh euismod tincidunt.", "bbox": {"l": 133.76801021944917, "t": 340.3970618794788, "r": 326.20752053515014, "b": 350.3596819344868, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt."}, {"label": "formula", "id": 6, "page_no": 1, "cluster": {"id": 6, "label": "formula", "bbox": {"l": 280.5539904602546, "t": 362.4346620011579, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}, "confidence": 0.8711639046669006, "cells": [{"id": 19, "text": "a", "bbox": {"l": 280.5539904602546, "t": 364.3070620114962, "r": 285.8142404688841, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}, {"id": 20, "text": "2", "bbox": {"l": 285.8200104688936, "t": 362.4346620011579, "r": 289.78809047540335, "b": 369.40844203966316, "coord_origin": "TOPLEFT"}}, {"id": 21, "text": "+ 8 = 12", "bbox": {"l": 292.50400047985886, "t": 364.3070620114962, "r": 330.6965905425146, "b": 374.2696820665041, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "a 2 + 8 = 12"}, {"label": "text", "id": 0, "page_no": 1, "cluster": {"id": 0, "label": "text", "bbox": {"l": 133.76801021944917, "t": 382.24005211051195, "r": 477.47589078330907, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}, "confidence": 0.9880034923553467, "cells": [{"id": 22, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy", "bbox": {"l": 148.71201024396512, "t": 382.24005211051195, "r": 477.4698807832991, "b": 392.2026621655199, "coord_origin": "TOPLEFT"}}, {"id": 23, "text": "eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam", "bbox": {"l": 133.76801021944917, "t": 394.19506217652076, "r": 477.47580078330884, "b": 404.15768223152867, "coord_origin": "TOPLEFT"}}, {"id": 24, "text": "voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita", "bbox": {"l": 133.76801021944917, "t": 406.15005224252945, "r": 477.47580078330884, "b": 416.1126722975373, "coord_origin": "TOPLEFT"}}, {"id": 25, "text": "kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem", "bbox": {"l": 133.76801021944917, "t": 418.10507230853824, "r": 477.47284078330404, "b": 428.06768236354606, "coord_origin": "TOPLEFT"}}, {"id": 26, "text": "ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod", "bbox": {"l": 133.76801021944917, "t": 430.0600523745468, "r": 477.47589078330907, "b": 440.0226724295547, "coord_origin": "TOPLEFT"}}, {"id": 27, "text": "tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At", "bbox": {"l": 133.76801021944917, "t": 442.01605244056105, "r": 477.4717707833023, "b": 451.97866249556887, "coord_origin": "TOPLEFT"}}, {"id": 28, "text": "vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren,", "bbox": {"l": 133.76801021944917, "t": 453.9710625065698, "r": 477.47388078330573, "b": 463.93368256157777, "coord_origin": "TOPLEFT"}}, {"id": 29, "text": "no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor", "bbox": {"l": 133.76801021944917, "t": 465.9260525725785, "r": 477.4708307833007, "b": 475.88867262758635, "coord_origin": "TOPLEFT"}}, {"id": 30, "text": "sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt", "bbox": {"l": 133.76801021944917, "t": 477.8810726385873, "r": 477.4709207833008, "b": 487.84368269359516, "coord_origin": "TOPLEFT"}}, {"id": 31, "text": "ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et", "bbox": {"l": 133.76801021944917, "t": 489.8360527045959, "r": 477.4658807832926, "b": 499.7986727596038, "coord_origin": "TOPLEFT"}}, {"id": 32, "text": "accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea", "bbox": {"l": 133.76801021944917, "t": 501.7910427706045, "r": 477.47586078330903, "b": 511.7536628256125, "coord_origin": "TOPLEFT"}}, {"id": 33, "text": "takimata sanctus est Lorem ipsum dolor sit amet.", "bbox": {"l": 133.76801021944917, "t": 513.7470728366188, "r": 351.4767205766048, "b": 523.7096828916267, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet."}, {"label": "text", "id": 4, "page_no": 1, "cluster": {"id": 4, "label": "text", "bbox": {"l": 133.76801021944917, "t": 525.7020529026275, "r": 477.47482078330734, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}, "confidence": 0.9844803214073181, "cells": [{"id": 34, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 525.7020529026275, "r": 477.4710707833011, "b": 535.6646729576354, "coord_origin": "TOPLEFT"}}, {"id": 35, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 537.6570729686364, "r": 477.47293078330415, "b": 547.6196930236442, "coord_origin": "TOPLEFT"}}, {"id": 36, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 549.612063034645, "r": 477.47482078330734, "b": 559.5746730896528, "coord_origin": "TOPLEFT"}}, {"id": 37, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 561.5670431006536, "r": 477.472810783304, "b": 571.5296631556615, "coord_origin": "TOPLEFT"}}, {"id": 38, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 573.5220631666624, "r": 477.47479078330724, "b": 583.4846832216704, "coord_origin": "TOPLEFT"}}, {"id": 39, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 585.4780532326765, "r": 173.0565202839028, "b": 595.4406532876844, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}, {"label": "text", "id": 2, "page_no": 1, "cluster": {"id": 2, "label": "text", "bbox": {"l": 133.76801021944917, "t": 597.4330632986853, "r": 477.47482078330734, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}, "confidence": 0.9872003793716431, "cells": [{"id": 40, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie", "bbox": {"l": 148.71201024396512, "t": 597.4330632986853, "r": 477.4710707833011, "b": 607.3956633536932, "coord_origin": "TOPLEFT"}}, {"id": 41, "text": "consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et", "bbox": {"l": 133.76801021944917, "t": 609.3880633646941, "r": 477.47293078330415, "b": 619.3506634197018, "coord_origin": "TOPLEFT"}}, {"id": 42, "text": "iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore", "bbox": {"l": 133.76801021944917, "t": 621.3430634307027, "r": 477.47482078330734, "b": 631.3056634857105, "coord_origin": "TOPLEFT"}}, {"id": 43, "text": "te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit,", "bbox": {"l": 133.76801021944917, "t": 633.2980634967114, "r": 477.472810783304, "b": 643.2606635517193, "coord_origin": "TOPLEFT"}}, {"id": 44, "text": "sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat", "bbox": {"l": 133.76801021944917, "t": 645.2540535627256, "r": 477.47479078330724, "b": 655.2166536177334, "coord_origin": "TOPLEFT"}}, {"id": 45, "text": "volutpat.", "bbox": {"l": 133.76801021944917, "t": 657.2090636287344, "r": 173.0565202839028, "b": 667.1716636837423, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "Duis autem vel eum iriure dolor in hendrerit in vulputate velit esse molestie consequat, vel illum dolore eu feugiat nulla facilisis at vero eros et accumsan et iusto odio dignissim qui blandit praesent luptatum zzril delenit augue duis dolore te feugait nulla facilisi. Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet dolore magna aliquam erat volutpat."}], "headers": [{"label": "page_footer", "id": 7, "page_no": 1, "cluster": {"id": 7, "label": "page_footer", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}, "confidence": 0.850279688835144, "cells": [{"id": 46, "text": "1", "bbox": {"l": 303.13300049729594, "t": 695.1640638383003, "r": 308.1142905054678, "b": 705.1266638933081, "coord_origin": "TOPLEFT"}}], "children": []}, "text": "1"}]}}] \ No newline at end of file diff --git a/tests/test_code_formula.py b/tests/test_code_formula.py new file mode 100644 index 0000000..05e8724 --- /dev/null +++ b/tests/test_code_formula.py @@ -0,0 +1,66 @@ +from pathlib import Path + +from docling_core.types.doc import CodeItem, TextItem +from docling_core.types.doc.labels import CodeLanguageLabel, DocItemLabel + +from docling.backend.docling_parse_backend import DoclingParseDocumentBackend +from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import ConversionResult +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.document_converter import DocumentConverter, PdfFormatOption +from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline + + +def get_converter(): + + pipeline_options = PdfPipelineOptions() + pipeline_options.generate_page_images = True + + pipeline_options.do_ocr = False + pipeline_options.do_table_structure = False + pipeline_options.do_code_enrichment = True + pipeline_options.do_formula_enrichment = True + + converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption( + backend=DoclingParseV2DocumentBackend, + pipeline_cls=StandardPdfPipeline, + pipeline_options=pipeline_options, + ) + } + ) + + return converter + + +def test_code_and_formula_conversion(): + pdf_path = Path("tests/data/code_and_formula.pdf") + converter = get_converter() + + print(f"converting {pdf_path}") + + doc_result: ConversionResult = converter.convert(pdf_path) + + results = doc_result.document.texts + + code_blocks = [el for el in results if isinstance(el, CodeItem)] + assert len(code_blocks) == 1 + + gt = 'public static void print() {\n System.out.println("Java Code");\n}' + + predicted = code_blocks[0].text.strip() + assert predicted == gt, f"mismatch in text {predicted=}, {gt=}" + assert code_blocks[0].code_language == CodeLanguageLabel.JAVA + + formula_blocks = [ + el + for el in results + if isinstance(el, TextItem) and el.label == DocItemLabel.FORMULA + ] + assert len(formula_blocks) == 1 + + gt = "a ^ { 2 } + 8 = 1 2" + predicted = formula_blocks[0].text + assert predicted == gt, f"mismatch in text {predicted=}, {gt=}"