ci: add coverage and ruff (#1383)

* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
2025-04-14 18:01:26 +02:00
parent 293c28ca7c
commit 5458a88464
104 changed files with 665 additions and 633 deletions
@@ -1,6 +1,7 @@
 import logging
 import re
-from typing import Iterable, List
+from collections.abc import Iterable
+from typing import List

 from pydantic import BaseModel

@@ -53,9 +54,9 @@ class PageAssembleModel(BasePageModel):
        sanitized_text = "".join(lines)

        # Text normalization
-        sanitized_text = sanitized_text.replace("⁄", "/")
-        sanitized_text = sanitized_text.replace("’", "'")
-        sanitized_text = sanitized_text.replace("‘", "'")
+        sanitized_text = sanitized_text.replace("⁄", "/")  # noqa: RUF001
+        sanitized_text = sanitized_text.replace("’", "'")  # noqa: RUF001
+        sanitized_text = sanitized_text.replace("‘", "'")  # noqa: RUF001
        sanitized_text = sanitized_text.replace("“", '"')
        sanitized_text = sanitized_text.replace("”", '"')
        sanitized_text = sanitized_text.replace("•", "·")
@@ -71,7 +72,6 @@ class PageAssembleModel(BasePageModel):
                yield page
            else:
                with TimeRecorder(conv_res, "page_assemble"):
-
                    assert page.predictions.layout is not None

                    # assembles some JSON output page by page.
@@ -83,7 +83,6 @@ class PageAssembleModel(BasePageModel):
                    for cluster in page.predictions.layout.clusters:
                        # _log.info("Cluster label seen:", cluster.label)
                        if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
-
                            textlines = [
                                cell.text.replace("\x02", "-").strip()
                                for cell in cluster.cells
@@ -109,9 +108,7 @@ class PageAssembleModel(BasePageModel):
                                tbl = page.predictions.tablestructure.table_map.get(
                                    cluster.id, None
                                )
-                            if (
-                                not tbl
-                            ):  # fallback: add table without structure, if it isn't present
+                            if not tbl:  # fallback: add table without structure, if it isn't present
                                tbl = Table(
                                    label=cluster.label,
                                    id=cluster.id,
@@ -130,9 +127,7 @@ class PageAssembleModel(BasePageModel):
                                fig = page.predictions.figures_classification.figure_map.get(
                                    cluster.id, None
                                )
-                            if (
-                                not fig
-                            ):  # fallback: add figure without classification, if it isn't present
+                            if not fig:  # fallback: add figure without classification, if it isn't present
                                fig = FigureElement(
                                    label=cluster.label,
                                    id=cluster.id,