ci: add coverage and ruff (#1383)

* add coverage calculation and push

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* new codecov version and usage of token

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* enable ruff formatter instead of black and isort

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply ruff lint fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply ruff unsafe fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add removed imports

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* runs 1 on linter issues

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* finalize linter fixes

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* Update pyproject.toml

Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Michele Dolfi
2025-04-14 18:01:26 +02:00
committed by GitHub
parent 293c28ca7c
commit 5458a88464
104 changed files with 665 additions and 633 deletions

View File

@@ -122,7 +122,6 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
@override
def convert(self) -> DoclingDocument:
if self.parser is not None:
doc = self.parser.parse(self.patent_content)
if doc is None:
@@ -163,7 +162,6 @@ class PatentUspto(ABC):
Returns:
The patent parsed as a docling document.
"""
pass
class PatentUsptoIce(PatentUspto):
@@ -265,7 +263,7 @@ class PatentUsptoIce(PatentUspto):
self.style_html = HtmlEntity()
@override
def startElement(self, tag, attributes): # noqa: N802
def startElement(self, tag, attributes):
"""Signal the start of an element.
Args:
@@ -281,7 +279,7 @@ class PatentUsptoIce(PatentUspto):
self._start_registered_elements(tag, attributes)
@override
def skippedEntity(self, name): # noqa: N802
def skippedEntity(self, name):
"""Receive notification of a skipped entity.
HTML entities will be skipped by the parser. This method will unescape them
@@ -315,7 +313,7 @@ class PatentUsptoIce(PatentUspto):
self.text += unescaped
@override
def endElement(self, tag): # noqa: N802
def endElement(self, tag):
"""Signal the end of an element.
Args:
@@ -603,7 +601,7 @@ class PatentUsptoGrantV2(PatentUspto):
self.style_html = HtmlEntity()
@override
def startElement(self, tag, attributes): # noqa: N802
def startElement(self, tag, attributes):
"""Signal the start of an element.
Args:
@@ -616,7 +614,7 @@ class PatentUsptoGrantV2(PatentUspto):
self._start_registered_elements(tag, attributes)
@override
def skippedEntity(self, name): # noqa: N802
def skippedEntity(self, name):
"""Receive notification of a skipped entity.
HTML entities will be skipped by the parser. This method will unescape them
@@ -650,7 +648,7 @@ class PatentUsptoGrantV2(PatentUspto):
self.text += unescaped
@override
def endElement(self, tag): # noqa: N802
def endElement(self, tag):
"""Signal the end of an element.
Args:
@@ -691,7 +689,7 @@ class PatentUsptoGrantV2(PatentUspto):
if tag in [member.value for member in self.Element]:
if (
tag == self.Element.HEADING.value
and not self.Element.SDOCL.value in self.property
and self.Element.SDOCL.value not in self.property
):
level_attr: str = attributes.get("LVL", "")
new_level: int = int(level_attr) if level_attr.isnumeric() else 1
@@ -743,7 +741,7 @@ class PatentUsptoGrantV2(PatentUspto):
# headers except claims statement
elif (
self.Element.HEADING.value in self.property
and not self.Element.SDOCL.value in self.property
and self.Element.SDOCL.value not in self.property
and text.strip()
):
self.parents[self.level + 1] = self.doc.add_heading(
@@ -1164,7 +1162,7 @@ class PatentUsptoAppV1(PatentUspto):
self.style_html = HtmlEntity()
@override
def startElement(self, tag, attributes): # noqa: N802
def startElement(self, tag, attributes):
"""Signal the start of an element.
Args:
@@ -1177,7 +1175,7 @@ class PatentUsptoAppV1(PatentUspto):
self._start_registered_elements(tag, attributes)
@override
def skippedEntity(self, name): # noqa: N802
def skippedEntity(self, name):
"""Receive notification of a skipped entity.
HTML entities will be skipped by the parser. This method will unescape them
@@ -1211,7 +1209,7 @@ class PatentUsptoAppV1(PatentUspto):
self.text += unescaped
@override
def endElement(self, tag): # noqa: N802
def endElement(self, tag):
"""Signal the end of an element.
Args:
@@ -1474,9 +1472,7 @@ class XmlTable:
if cw == 0:
offset_w0.append(col["offset"][ic])
min_colinfo["offset"] = sorted(
list(set(col["offset"] + min_colinfo["offset"]))
)
min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
# add back the 0 width cols to offset list
offset_w0 = list(set(offset_w0))
@@ -1527,7 +1523,7 @@ class XmlTable:
return ncols_max
def _parse_table(self, table: Tag) -> TableData:
def _parse_table(self, table: Tag) -> TableData: # noqa: C901
"""Parse the content of a table tag.
Args:
@@ -1722,7 +1718,7 @@ class HtmlEntity:
"0": "&#8304;",
"+": "&#8314;",
"-": "&#8315;",
"": "&#8315;",
"": "&#8315;", # noqa: RUF001
"=": "&#8316;",
"(": "&#8317;",
")": "&#8318;",
@@ -1746,7 +1742,7 @@ class HtmlEntity:
"0": "&#8320;",
"+": "&#8330;",
"-": "&#8331;",
"": "&#8331;",
"": "&#8331;", # noqa: RUF001
"=": "&#8332;",
"(": "&#8333;",
")": "&#8334;",