ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -1,12 +1,7 @@
|
||||
import copy
|
||||
import random
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from docling_core.types.doc import (
|
||||
BoundingBox,
|
||||
CoordOrigin,
|
||||
DocItem,
|
||||
DocItemLabel,
|
||||
DoclingDocument,
|
||||
DocumentOrigin,
|
||||
@@ -17,13 +12,10 @@ from docling_core.types.doc import (
|
||||
TableData,
|
||||
)
|
||||
from docling_core.types.doc.document import ContentLayer
|
||||
from docling_core.types.legacy_doc.base import Ref
|
||||
from docling_core.types.legacy_doc.document import BaseText
|
||||
from docling_ibm_models.reading_order.reading_order_rb import (
|
||||
PageElement as ReadingOrderPageElement,
|
||||
ReadingOrderPredictor,
|
||||
)
|
||||
from docling_ibm_models.reading_order.reading_order_rb import ReadingOrderPredictor
|
||||
from PIL import ImageDraw
|
||||
from pydantic import BaseModel, ConfigDict
|
||||
|
||||
from docling.datamodel.base_models import (
|
||||
@@ -35,7 +27,6 @@ from docling.datamodel.base_models import (
|
||||
TextElement,
|
||||
)
|
||||
from docling.datamodel.document import ConversionResult
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||
|
||||
|
||||
@@ -53,12 +44,10 @@ class ReadingOrderModel:
|
||||
def _assembled_to_readingorder_elements(
|
||||
self, conv_res: ConversionResult
|
||||
) -> List[ReadingOrderPageElement]:
|
||||
|
||||
elements: List[ReadingOrderPageElement] = []
|
||||
page_no_to_pages = {p.page_no: p for p in conv_res.pages}
|
||||
|
||||
for element in conv_res.assembled.elements:
|
||||
|
||||
page_height = page_no_to_pages[element.page_no].size.height # type: ignore
|
||||
bbox = element.cluster.bbox.to_bottom_left_origin(page_height)
|
||||
text = element.text or ""
|
||||
@@ -84,7 +73,6 @@ class ReadingOrderModel:
|
||||
def _add_child_elements(
|
||||
self, element: BasePageElement, doc_item: NodeItem, doc: DoclingDocument
|
||||
):
|
||||
|
||||
child: Cluster
|
||||
for child in element.cluster.children:
|
||||
c_label = child.label
|
||||
@@ -110,7 +98,7 @@ class ReadingOrderModel:
|
||||
else:
|
||||
doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
|
||||
|
||||
def _readingorder_elements_to_docling_doc(
|
||||
def _readingorder_elements_to_docling_doc( # noqa: C901
|
||||
self,
|
||||
conv_res: ConversionResult,
|
||||
ro_elements: List[ReadingOrderPageElement],
|
||||
@@ -118,7 +106,6 @@ class ReadingOrderModel:
|
||||
el_to_footnotes_mapping: Dict[int, List[int]],
|
||||
el_merges_mapping: Dict[int, List[int]],
|
||||
) -> DoclingDocument:
|
||||
|
||||
id_to_elem = {
|
||||
RefItem(cref=f"#/{elem.page_no}/{elem.cluster.id}").cref: elem
|
||||
for elem in conv_res.assembled.elements
|
||||
@@ -192,7 +179,6 @@ class ReadingOrderModel:
|
||||
|
||||
code_item.footnotes.append(new_footnote_item.get_ref())
|
||||
else:
|
||||
|
||||
new_item, current_list = self._handle_text_element(
|
||||
element, out_doc, current_list, page_height
|
||||
)
|
||||
@@ -206,7 +192,6 @@ class ReadingOrderModel:
|
||||
)
|
||||
|
||||
elif isinstance(element, Table):
|
||||
|
||||
tbl_data = TableData(
|
||||
num_rows=element.num_rows,
|
||||
num_cols=element.num_cols,
|
||||
@@ -342,12 +327,12 @@ class ReadingOrderModel:
|
||||
return new_item, current_list
|
||||
|
||||
def _merge_elements(self, element, merged_elem, new_item, page_height):
|
||||
assert isinstance(
|
||||
merged_elem, type(element)
|
||||
), "Merged element must be of same type as element."
|
||||
assert (
|
||||
merged_elem.label == new_item.label
|
||||
), "Labels of merged elements must match."
|
||||
assert isinstance(merged_elem, type(element)), (
|
||||
"Merged element must be of same type as element."
|
||||
)
|
||||
assert merged_elem.label == new_item.label, (
|
||||
"Labels of merged elements must match."
|
||||
)
|
||||
prov = ProvenanceItem(
|
||||
page_no=element.page_no + 1,
|
||||
charspan=(
|
||||
|
||||
Reference in New Issue
Block a user