ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -122,7 +122,6 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
@override
|
||||
def convert(self) -> DoclingDocument:
|
||||
|
||||
if self.parser is not None:
|
||||
doc = self.parser.parse(self.patent_content)
|
||||
if doc is None:
|
||||
@@ -163,7 +162,6 @@ class PatentUspto(ABC):
|
||||
Returns:
|
||||
The patent parsed as a docling document.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class PatentUsptoIce(PatentUspto):
|
||||
@@ -265,7 +263,7 @@ class PatentUsptoIce(PatentUspto):
|
||||
self.style_html = HtmlEntity()
|
||||
|
||||
@override
|
||||
def startElement(self, tag, attributes): # noqa: N802
|
||||
def startElement(self, tag, attributes):
|
||||
"""Signal the start of an element.
|
||||
|
||||
Args:
|
||||
@@ -281,7 +279,7 @@ class PatentUsptoIce(PatentUspto):
|
||||
self._start_registered_elements(tag, attributes)
|
||||
|
||||
@override
|
||||
def skippedEntity(self, name): # noqa: N802
|
||||
def skippedEntity(self, name):
|
||||
"""Receive notification of a skipped entity.
|
||||
|
||||
HTML entities will be skipped by the parser. This method will unescape them
|
||||
@@ -315,7 +313,7 @@ class PatentUsptoIce(PatentUspto):
|
||||
self.text += unescaped
|
||||
|
||||
@override
|
||||
def endElement(self, tag): # noqa: N802
|
||||
def endElement(self, tag):
|
||||
"""Signal the end of an element.
|
||||
|
||||
Args:
|
||||
@@ -603,7 +601,7 @@ class PatentUsptoGrantV2(PatentUspto):
|
||||
self.style_html = HtmlEntity()
|
||||
|
||||
@override
|
||||
def startElement(self, tag, attributes): # noqa: N802
|
||||
def startElement(self, tag, attributes):
|
||||
"""Signal the start of an element.
|
||||
|
||||
Args:
|
||||
@@ -616,7 +614,7 @@ class PatentUsptoGrantV2(PatentUspto):
|
||||
self._start_registered_elements(tag, attributes)
|
||||
|
||||
@override
|
||||
def skippedEntity(self, name): # noqa: N802
|
||||
def skippedEntity(self, name):
|
||||
"""Receive notification of a skipped entity.
|
||||
|
||||
HTML entities will be skipped by the parser. This method will unescape them
|
||||
@@ -650,7 +648,7 @@ class PatentUsptoGrantV2(PatentUspto):
|
||||
self.text += unescaped
|
||||
|
||||
@override
|
||||
def endElement(self, tag): # noqa: N802
|
||||
def endElement(self, tag):
|
||||
"""Signal the end of an element.
|
||||
|
||||
Args:
|
||||
@@ -691,7 +689,7 @@ class PatentUsptoGrantV2(PatentUspto):
|
||||
if tag in [member.value for member in self.Element]:
|
||||
if (
|
||||
tag == self.Element.HEADING.value
|
||||
and not self.Element.SDOCL.value in self.property
|
||||
and self.Element.SDOCL.value not in self.property
|
||||
):
|
||||
level_attr: str = attributes.get("LVL", "")
|
||||
new_level: int = int(level_attr) if level_attr.isnumeric() else 1
|
||||
@@ -743,7 +741,7 @@ class PatentUsptoGrantV2(PatentUspto):
|
||||
# headers except claims statement
|
||||
elif (
|
||||
self.Element.HEADING.value in self.property
|
||||
and not self.Element.SDOCL.value in self.property
|
||||
and self.Element.SDOCL.value not in self.property
|
||||
and text.strip()
|
||||
):
|
||||
self.parents[self.level + 1] = self.doc.add_heading(
|
||||
@@ -1164,7 +1162,7 @@ class PatentUsptoAppV1(PatentUspto):
|
||||
self.style_html = HtmlEntity()
|
||||
|
||||
@override
|
||||
def startElement(self, tag, attributes): # noqa: N802
|
||||
def startElement(self, tag, attributes):
|
||||
"""Signal the start of an element.
|
||||
|
||||
Args:
|
||||
@@ -1177,7 +1175,7 @@ class PatentUsptoAppV1(PatentUspto):
|
||||
self._start_registered_elements(tag, attributes)
|
||||
|
||||
@override
|
||||
def skippedEntity(self, name): # noqa: N802
|
||||
def skippedEntity(self, name):
|
||||
"""Receive notification of a skipped entity.
|
||||
|
||||
HTML entities will be skipped by the parser. This method will unescape them
|
||||
@@ -1211,7 +1209,7 @@ class PatentUsptoAppV1(PatentUspto):
|
||||
self.text += unescaped
|
||||
|
||||
@override
|
||||
def endElement(self, tag): # noqa: N802
|
||||
def endElement(self, tag):
|
||||
"""Signal the end of an element.
|
||||
|
||||
Args:
|
||||
@@ -1474,9 +1472,7 @@ class XmlTable:
|
||||
if cw == 0:
|
||||
offset_w0.append(col["offset"][ic])
|
||||
|
||||
min_colinfo["offset"] = sorted(
|
||||
list(set(col["offset"] + min_colinfo["offset"]))
|
||||
)
|
||||
min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
|
||||
|
||||
# add back the 0 width cols to offset list
|
||||
offset_w0 = list(set(offset_w0))
|
||||
@@ -1527,7 +1523,7 @@ class XmlTable:
|
||||
|
||||
return ncols_max
|
||||
|
||||
def _parse_table(self, table: Tag) -> TableData:
|
||||
def _parse_table(self, table: Tag) -> TableData: # noqa: C901
|
||||
"""Parse the content of a table tag.
|
||||
|
||||
Args:
|
||||
@@ -1722,7 +1718,7 @@ class HtmlEntity:
|
||||
"0": "⁰",
|
||||
"+": "⁺",
|
||||
"-": "⁻",
|
||||
"−": "⁻",
|
||||
"−": "⁻", # noqa: RUF001
|
||||
"=": "⁼",
|
||||
"(": "⁽",
|
||||
")": "⁾",
|
||||
@@ -1746,7 +1742,7 @@ class HtmlEntity:
|
||||
"0": "₀",
|
||||
"+": "₊",
|
||||
"-": "₋",
|
||||
"−": "₋",
|
||||
"−": "₋", # noqa: RUF001
|
||||
"=": "₌",
|
||||
"(": "₍",
|
||||
")": "₎",
|
||||
|
||||
Reference in New Issue
Block a user