ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import re
|
||||
from typing import Iterable, List
|
||||
from collections.abc import Iterable
|
||||
from typing import List
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
@@ -53,9 +54,9 @@ class PageAssembleModel(BasePageModel):
|
||||
sanitized_text = "".join(lines)
|
||||
|
||||
# Text normalization
|
||||
sanitized_text = sanitized_text.replace("⁄", "/")
|
||||
sanitized_text = sanitized_text.replace("’", "'")
|
||||
sanitized_text = sanitized_text.replace("‘", "'")
|
||||
sanitized_text = sanitized_text.replace("⁄", "/") # noqa: RUF001
|
||||
sanitized_text = sanitized_text.replace("’", "'") # noqa: RUF001
|
||||
sanitized_text = sanitized_text.replace("‘", "'") # noqa: RUF001
|
||||
sanitized_text = sanitized_text.replace("“", '"')
|
||||
sanitized_text = sanitized_text.replace("”", '"')
|
||||
sanitized_text = sanitized_text.replace("•", "·")
|
||||
@@ -71,7 +72,6 @@ class PageAssembleModel(BasePageModel):
|
||||
yield page
|
||||
else:
|
||||
with TimeRecorder(conv_res, "page_assemble"):
|
||||
|
||||
assert page.predictions.layout is not None
|
||||
|
||||
# assembles some JSON output page by page.
|
||||
@@ -83,7 +83,6 @@ class PageAssembleModel(BasePageModel):
|
||||
for cluster in page.predictions.layout.clusters:
|
||||
# _log.info("Cluster label seen:", cluster.label)
|
||||
if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
|
||||
|
||||
textlines = [
|
||||
cell.text.replace("\x02", "-").strip()
|
||||
for cell in cluster.cells
|
||||
@@ -109,9 +108,7 @@ class PageAssembleModel(BasePageModel):
|
||||
tbl = page.predictions.tablestructure.table_map.get(
|
||||
cluster.id, None
|
||||
)
|
||||
if (
|
||||
not tbl
|
||||
): # fallback: add table without structure, if it isn't present
|
||||
if not tbl: # fallback: add table without structure, if it isn't present
|
||||
tbl = Table(
|
||||
label=cluster.label,
|
||||
id=cluster.id,
|
||||
@@ -130,9 +127,7 @@ class PageAssembleModel(BasePageModel):
|
||||
fig = page.predictions.figures_classification.figure_map.get(
|
||||
cluster.id, None
|
||||
)
|
||||
if (
|
||||
not fig
|
||||
): # fallback: add figure without classification, if it isn't present
|
||||
if not fig: # fallback: add figure without classification, if it isn't present
|
||||
fig = FigureElement(
|
||||
label=cluster.label,
|
||||
id=cluster.id,
|
||||
|
||||
Reference in New Issue
Block a user