diff --git a/.github/codecov.yml b/.github/codecov.yml
new file mode 100644
index 0000000..944d4a0
--- /dev/null
+++ b/.github/codecov.yml
@@ -0,0 +1,17 @@
+codecov:
+  # https://docs.codecov.io/docs/comparing-commits
+  allow_coverage_offsets: true
+coverage:
+  status:
+    project:
+      default:
+        informational: true
+        target: auto  # auto compares coverage to the previous base commit
+        flags:
+          - docling
+  comment:
+    layout: "reach, diff, flags, files"
+    behavior: default
+    require_changes: false  # if true: only post the comment if coverage changes
+    branches:               # branch names that can post comment
+      - "main"
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 1f0502d..f569130 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -10,6 +10,8 @@ env:
 jobs:
   code-checks:
     uses: ./.github/workflows/checks.yml
+    with:
+      push_coverage: false
   pre-release-check:
     runs-on: ubuntu-latest
     outputs:
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index ee5ba79..8b93a56 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -1,5 +1,13 @@
 on:
   workflow_call:
+    inputs:
+      push_coverage:
+          type: boolean
+          description: "If true, the coverage results are pushed to codecov.io."
+          default: true
+    secrets:
+      CODECOV_TOKEN:
+        required: false        
 
 env:
   HF_HUB_DOWNLOAD_TIMEOUT: "60"
@@ -32,7 +40,13 @@ jobs:
         run: poetry install --all-extras
       - name: Testing
         run: |
-          poetry run pytest -v tests
+          poetry run pytest -v --cov=docling --cov-report=xml tests
+      - name: Upload coverage to Codecov
+        if: inputs.push_coverage
+        uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          file: ./coverage.xml
       - name: Run examples
         run: |
           for file in docs/examples/*.py; do
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0bf45ce..e2cf18a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,3 +17,5 @@ jobs:
   code-checks:
     if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'docling-project/docling' && github.event.pull_request.head.repo.full_name != 'docling-project/docling') }}
     uses: ./.github/workflows/checks.yml
+    secrets:
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 19bb27c..041a100 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,43 +1,26 @@
 fail_fast: true
 repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.11.5
+    hooks:
+      # Run the Ruff formatter.
+      - id: ruff-format
+        name: "Ruff formatter"
+        args: [--config=pyproject.toml]
+        files: '^(docling|tests|docs/examples).*\.(py|ipynb)$'
+      # Run the Ruff linter.
+      - id: ruff
+        name: "Ruff linter"
+        args: [--exit-non-zero-on-fix, --fix, --config=pyproject.toml]
+        files: '^(docling|tests|docs/examples).*\.(py|ipynb)$'
   - repo: local
     hooks:
-      - id: black
-        name: Black
-        entry: poetry run black docling docs/examples tests
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-      - id: isort
-        name: isort
-        entry: poetry run isort docling docs/examples tests
-        pass_filenames: false
-        language: system
-        files: '\.py$'
-#      - id: flake8
-#        name: flake8
-#        entry: poetry run flake8 docling
-#        pass_filenames: false
-#        language: system
-#        files: '\.py$'
       - id: mypy
         name: MyPy
         entry: poetry run mypy docling
         pass_filenames: false
         language: system
         files: '\.py$'
-      - id: nbqa_black
-        name: nbQA Black
-        entry: poetry run nbqa black docs/examples
-        pass_filenames: false
-        language: system
-        files: '\.ipynb$'
-      - id: nbqa_isort
-        name: nbQA isort
-        entry: poetry run nbqa isort docs/examples
-        pass_filenames: false
-        language: system
-        files: '\.ipynb$'
       - id: poetry
         name: Poetry check
         entry: poetry check --lock
diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py
index 09891eb..3c41810 100644
--- a/docling/backend/asciidoc_backend.py
+++ b/docling/backend/asciidoc_backend.py
@@ -34,7 +34,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                 text_stream = self.path_or_stream.getvalue().decode("utf-8")
                 self.lines = text_stream.split("\n")
             if isinstance(self.path_or_stream, Path):
-                with open(self.path_or_stream, "r", encoding="utf-8") as f:
+                with open(self.path_or_stream, encoding="utf-8") as f:
                     self.lines = f.readlines()
             self.valid = True
 
@@ -75,14 +75,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
 
         return doc
 
-    def _parse(self, doc: DoclingDocument):
+    def _parse(self, doc: DoclingDocument):  # noqa: C901
         """
         Main function that orchestrates the parsing by yielding components:
         title, section headers, text, lists, and tables.
         """
 
-        content = ""
-
         in_list = False
         in_table = False
 
@@ -95,7 +93,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
         # indents: dict[int, Union[DocItem, GroupItem, None]] = {}
         indents: dict[int, Union[GroupItem, None]] = {}
 
-        for i in range(0, 10):
+        for i in range(10):
             parents[i] = None
             indents[i] = None
 
@@ -125,7 +123,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
 
             # Lists
             elif self._is_list_item(line):
-
                 _log.debug(f"line: {line}")
                 item = self._parse_list_item(line)
                 _log.debug(f"parsed list-item: {item}")
@@ -147,7 +144,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                     indents[level + 1] = item["indent"]
 
                 elif in_list and item["indent"] < indents[level]:
-
                     # print(item["indent"], " => ", indents[level])
                     while item["indent"] < indents[level]:
                         # print(item["indent"], " => ", indents[level])
@@ -176,7 +172,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
             elif in_table and (
                 (not self._is_table_line(line)) or line.strip() == "|==="
             ):  # end of table
-
                 caption = None
                 if len(caption_data) > 0:
                     caption = doc.add_text(
@@ -195,7 +190,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
 
             # Picture
             elif self._is_picture(line):
-
                 caption = None
                 if len(caption_data) > 0:
                     caption = doc.add_text(
@@ -250,7 +244,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                 text_data = []
 
             elif len(line.strip()) > 0:  # allow multiline texts
-
                 item = self._parse_text(line)
                 text_data.append(item["text"])
 
@@ -273,14 +266,14 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
 
     def _get_current_level(self, parents):
         for k, v in parents.items():
-            if v == None and k > 0:
+            if v is None and k > 0:
                 return k - 1
 
         return 0
 
     def _get_current_parent(self, parents):
         for k, v in parents.items():
-            if v == None and k > 0:
+            if v is None and k > 0:
                 return parents[k - 1]
 
         return None
@@ -328,7 +321,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                     "marker": marker,
                     "text": text.strip(),
                     "numbered": False,
-                    "indent": 0 if indent == None else len(indent),
+                    "indent": 0 if indent is None else len(indent),
                 }
             else:
                 return {
@@ -336,7 +329,7 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
                     "marker": marker,
                     "text": text.strip(),
                     "numbered": True,
-                    "indent": 0 if indent == None else len(indent),
+                    "indent": 0 if indent is None else len(indent),
                 }
         else:
             # Fallback if no match
@@ -357,7 +350,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
         return [cell.strip() for cell in line.split("|") if cell.strip()]
 
     def _populate_table_as_grid(self, table_data):
-
         num_rows = len(table_data)
 
         # Adjust the table data into a grid format
diff --git a/docling/backend/csv_backend.py b/docling/backend/csv_backend.py
index 9159bd4..94d37d0 100644
--- a/docling/backend/csv_backend.py
+++ b/docling/backend/csv_backend.py
@@ -58,7 +58,7 @@ class CsvDocumentBackend(DeclarativeDocumentBackend):
         head = self.content.readline()
         dialect = csv.Sniffer().sniff(head, ",;\t|:")
         _log.info(f'Parsing CSV with delimiter: "{dialect.delimiter}"')
-        if not dialect.delimiter in {",", ";", "\t", "|", ":"}:
+        if dialect.delimiter not in {",", ";", "\t", "|", ":"}:
             raise RuntimeError(
                 f"Cannot convert csv with unknown delimiter {dialect.delimiter}."
             )
diff --git a/docling/backend/docling_parse_backend.py b/docling/backend/docling_parse_backend.py
index 533ed42..33e7792 100644
--- a/docling/backend/docling_parse_backend.py
+++ b/docling/backend/docling_parse_backend.py
@@ -1,8 +1,9 @@
 import logging
 import random
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import Iterable, List, Optional, Union
+from typing import List, Optional, Union
 
 import pypdfium2 as pdfium
 from docling_core.types.doc import BoundingBox, CoordOrigin, Size
@@ -156,7 +157,6 @@ class DoclingParsePageBackend(PdfPageBackend):
     def get_page_image(
         self, scale: float = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
-
         page_size = self.get_size()
 
         if not cropbox:
diff --git a/docling/backend/docling_parse_v2_backend.py b/docling/backend/docling_parse_v2_backend.py
index f7475aa..6c12b66 100644
--- a/docling/backend/docling_parse_v2_backend.py
+++ b/docling/backend/docling_parse_v2_backend.py
@@ -1,8 +1,9 @@
 import logging
 import random
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Union
 
 import pypdfium2 as pdfium
 from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -172,7 +173,6 @@ class DoclingParseV2PageBackend(PdfPageBackend):
     def get_page_image(
         self, scale: float = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
-
         page_size = self.get_size()
 
         if not cropbox:
diff --git a/docling/backend/docling_parse_v4_backend.py b/docling/backend/docling_parse_v4_backend.py
index e1e7430..3e59f12 100644
--- a/docling/backend/docling_parse_v4_backend.py
+++ b/docling/backend/docling_parse_v4_backend.py
@@ -1,14 +1,14 @@
 import logging
-import random
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, Optional, Union
 
 import pypdfium2 as pdfium
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
 from docling_parse.pdf_parser import DoclingPdfParser, PdfDocument
-from PIL import Image, ImageDraw
+from PIL import Image
 from pypdfium2 import PdfPage
 
 from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
@@ -93,7 +93,6 @@ class DoclingParseV4PageBackend(PdfPageBackend):
     def get_page_image(
         self, scale: float = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
-
         page_size = self.get_size()
 
         if not cropbox:
diff --git a/docling/backend/docx/latex/latex_dict.py b/docling/backend/docx/latex/latex_dict.py
index 0323478..1348647 100644
--- a/docling/backend/docx/latex/latex_dict.py
+++ b/docling/backend/docx/latex/latex_dict.py
@@ -1,12 +1,8 @@
-# -*- coding: utf-8 -*-
-
 """
 Adapted from https://github.com/xiilei/dwml/blob/master/dwml/latex_dict.py
 On 23/01/2025
 """
 
-from __future__ import unicode_literals
-
 CHARS = ("{", "}", "_", "^", "#", "&", "$", "%", "~")
 
 BLANK = ""
@@ -79,7 +75,6 @@ CHR_BO = {
 }
 
 T = {
-    "\u2192": "\\rightarrow ",
     # Greek letters
     "\U0001d6fc": "\\alpha ",
     "\U0001d6fd": "\\beta ",
diff --git a/docling/backend/docx/latex/omml.py b/docling/backend/docx/latex/omml.py
index b2d5f90..f927885 100644
--- a/docling/backend/docx/latex/omml.py
+++ b/docling/backend/docx/latex/omml.py
@@ -76,8 +76,7 @@ def get_val(key, default=None, store=CHR):
         return default
 
 
-class Tag2Method(object):
-
+class Tag2Method:
     def call_method(self, elm, stag=None):
         getmethod = self.tag2meth.get
         if stag is None:
@@ -130,7 +129,6 @@ class Tag2Method(object):
 
 
 class Pr(Tag2Method):
-
     text = ""
 
     __val_tags = ("chr", "pos", "begChr", "endChr", "type")
@@ -159,7 +157,7 @@ class Pr(Tag2Method):
     def do_common(self, elm):
         stag = elm.tag.replace(OMML_NS, "")
         if stag in self.__val_tags:
-            t = elm.get("{0}val".format(OMML_NS))
+            t = elm.get(f"{OMML_NS}val")
             self.__innerdict[stag] = t
         return None
 
@@ -248,7 +246,6 @@ class oMath2Latex(Tag2Method):
         """
         the Pre-Sub-Superscript object -- Not support yet
         """
-        pass
 
     def do_sub(self, elm):
         text = self.process_children(elm)
@@ -331,7 +328,7 @@ class oMath2Latex(Tag2Method):
         t_dict = self.process_children_dict(elm, include=("e", "lim"))
         latex_s = LIM_FUNC.get(t_dict["e"])
         if not latex_s:
-            raise NotSupport("Not support lim %s" % t_dict["e"])
+            raise RuntimeError("Not support lim {}".format(t_dict["e"]))
         else:
             return latex_s.format(lim=t_dict.get("lim"))
 
@@ -413,7 +410,7 @@ class oMath2Latex(Tag2Method):
         """
         _str = []
         _base_str = []
-        found_text = elm.findtext("./{0}t".format(OMML_NS))
+        found_text = elm.findtext(f"./{OMML_NS}t")
         if found_text:
             for s in found_text:
                 out_latex_str = self.process_unicode(s)
diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py
index 5889429..aa2637f 100644
--- a/docling/backend/html_backend.py
+++ b/docling/backend/html_backend.py
@@ -55,7 +55,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         self.max_levels = 10
         self.level = 0
         self.parents: dict[int, Optional[Union[DocItem, GroupItem]]] = {}
-        for i in range(0, self.max_levels):
+        for i in range(self.max_levels):
             self.parents[i] = None
 
         try:
@@ -126,7 +126,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
         return doc
 
     def walk(self, tag: Tag, doc: DoclingDocument) -> None:
-
         # Iterate over elements in the body of the document
         text: str = ""
         for element in tag.children:
@@ -135,7 +134,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     self.analyze_tag(cast(Tag, element), doc)
                 except Exception as exc_child:
                     _log.error(
-                        f"Error processing child from tag {tag.name}: {repr(exc_child)}"
+                        f"Error processing child from tag {tag.name}: {exc_child!r}"
                     )
                     raise exc_child
             elif isinstance(element, NavigableString) and not isinstance(
@@ -147,7 +146,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                     item for item in element.next_siblings if isinstance(item, Tag)
                 ]
                 if element.next_sibling is None or any(
-                    [item.name in TAGS_FOR_NODE_ITEMS for item in siblings]
+                    item.name in TAGS_FOR_NODE_ITEMS for item in siblings
                 ):
                     text = text.strip()
                     if text and tag.name in ["div"]:
@@ -222,7 +221,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             )
         else:
             if hlevel > self.level:
-
                 # add invisible group
                 for i in range(self.level + 1, hlevel):
                     self.parents[i] = doc.add_group(
@@ -234,7 +232,6 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
                 self.level = hlevel
 
             elif hlevel < self.level:
-
                 # remove the tail
                 for key in self.parents.keys():
                     if key > hlevel:
@@ -360,7 +357,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
             marker = ""
             enumerated = False
             if parent_label == GroupLabel.ORDERED_LIST:
-                marker = f"{str(index_in_list)}."
+                marker = f"{index_in_list!s}."
                 enumerated = True
             doc.add_list_item(
                 text=text,
diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py
index f83dd2d..0c6b306 100644
--- a/docling/backend/md_backend.py
+++ b/docling/backend/md_backend.py
@@ -83,7 +83,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
                 # otherwise they represent emphasis (bold or italic)
                 self.markdown = self._shorten_underscore_sequences(text_stream)
             if isinstance(self.path_or_stream, Path):
-                with open(self.path_or_stream, "r", encoding="utf-8") as f:
+                with open(self.path_or_stream, encoding="utf-8") as f:
                     md_content = f.read()
                     # remove invalid sequences
                     # very long sequences of underscores will lead to unnecessary long processing times.
@@ -168,7 +168,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
             )
         self.inline_texts = []
 
-    def _iterate_elements(
+    def _iterate_elements(  # noqa: C901
         self,
         element: marko.element.Element,
         depth: int,
@@ -176,7 +176,6 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
         visited: Set[marko.element.Element],
         parent_item: Optional[NodeItem] = None,
     ):
-
         if element in visited:
             return
 
@@ -236,7 +235,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
             if has_non_empty_list_items:
                 label = GroupLabel.ORDERED_LIST if element.ordered else GroupLabel.LIST
                 parent_item = doc.add_group(
-                    label=label, name=f"list", parent=parent_item
+                    label=label, name="list", parent=parent_item
                 )
 
         elif (
@@ -320,7 +319,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
             self._html_blocks += 1
             self._process_inline_text(parent_item, doc)
             self._close_table(doc)
-            _log.debug("HTML Block: {}".format(element))
+            _log.debug(f"HTML Block: {element}")
             if (
                 len(element.body) > 0
             ):  # If Marko doesn't return any content for HTML block, skip it
@@ -332,7 +331,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
         else:
             if not isinstance(element, str):
                 self._close_table(doc)
-                _log.debug("Some other element: {}".format(element))
+                _log.debug(f"Some other element: {element}")
 
         processed_block_types = (
             marko.block.Heading,
@@ -398,7 +397,6 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
 
             # if HTML blocks were detected, export to HTML and delegate to HTML backend
             if self._html_blocks > 0:
-
                 # export to HTML
                 html_backend_cls = HTMLDocumentBackend
                 html_str = doc.export_to_html()
diff --git a/docling/backend/msexcel_backend.py b/docling/backend/msexcel_backend.py
index 971b93c..0ae9083 100644
--- a/docling/backend/msexcel_backend.py
+++ b/docling/backend/msexcel_backend.py
@@ -184,7 +184,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         """
 
         if self.workbook is not None:
-
             # Iterate over all sheets
             for sheet_name in self.workbook.sheetnames:
                 _log.info(f"Processing sheet: {sheet_name}")
@@ -253,7 +252,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
                 )
 
                 for excel_cell in excel_table.data:
-
                     cell = TableCell(
                         text=excel_cell.text,
                         row_span=excel_cell.row_span,
@@ -303,7 +301,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         # Iterate over all cells in the sheet
         for ri, row in enumerate(sheet.iter_rows(values_only=False)):
             for rj, cell in enumerate(row):
-
                 # Skip empty or already visited cells
                 if cell.value is None or (ri, rj) in visited:
                     continue
@@ -342,7 +339,6 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
         visited_cells: set[tuple[int, int]] = set()
         for ri in range(start_row, max_row + 1):
             for rj in range(start_col, max_col + 1):
-
                 cell = sheet.cell(row=ri + 1, column=rj + 1)  # 1-based indexing
 
                 # Check if the cell belongs to a merged range
@@ -350,14 +346,12 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
                 col_span = 1
 
                 for merged_range in sheet.merged_cells.ranges:
-
                     if (
                         merged_range.min_row <= ri + 1
                         and ri + 1 <= merged_range.max_row
                         and merged_range.min_col <= rj + 1
                         and rj + 1 <= merged_range.max_col
                     ):
-
                         row_span = merged_range.max_row - merged_range.min_row + 1
                         col_span = merged_range.max_col - merged_range.min_col + 1
                         break
@@ -499,7 +493,7 @@ class MsExcelDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentBacken
                             ),
                         ),
                     )
-                except:
+                except Exception:
                     _log.error("could not extract the image from excel sheets")
 
         return doc
diff --git a/docling/backend/mspowerpoint_backend.py b/docling/backend/mspowerpoint_backend.py
index 2de0da1..3b9a6bb 100644
--- a/docling/backend/mspowerpoint_backend.py
+++ b/docling/backend/mspowerpoint_backend.py
@@ -120,13 +120,12 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
 
         return prov
 
-    def handle_text_elements(self, shape, parent_slide, slide_ind, doc, slide_size):
+    def handle_text_elements(self, shape, parent_slide, slide_ind, doc, slide_size):  # noqa: C901
         is_a_list = False
         is_list_group_created = False
         enum_list_item_value = 0
         new_list = None
         bullet_type = "None"
-        list_text = ""
         list_label = GroupLabel.LIST
         doc_label = DocItemLabel.LIST_ITEM
         prov = self.generate_prov(shape, slide_ind, shape.text.strip(), slide_size)
@@ -243,7 +242,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
                     enum_marker = str(enum_list_item_value) + "."
                 if not is_list_group_created:
                     new_list = doc.add_group(
-                        label=list_label, name=f"list", parent=parent_slide
+                        label=list_label, name="list", parent=parent_slide
                     )
                     is_list_group_created = True
                 doc.add_list_item(
@@ -368,11 +367,9 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
         slide_width = pptx_obj.slide_width
         slide_height = pptx_obj.slide_height
 
-        text_content = []  # type: ignore
-
         max_levels = 10
         parents = {}  # type: ignore
-        for i in range(0, max_levels):
+        for i in range(max_levels):
             parents[i] = None
 
         # Loop through each slide
@@ -383,7 +380,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
             )
 
             slide_size = Size(width=slide_width, height=slide_height)
-            parent_page = doc.add_page(page_no=slide_ind + 1, size=slide_size)
+            doc.add_page(page_no=slide_ind + 1, size=slide_size)
 
             def handle_shapes(shape, parent_slide, slide_ind, doc, slide_size):
                 handle_groups(shape, parent_slide, slide_ind, doc, slide_size)
diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py
index 5915c0a..a108361 100644
--- a/docling/backend/msword_backend.py
+++ b/docling/backend/msword_backend.py
@@ -158,7 +158,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
     def _get_level(self) -> int:
         """Return the first None index."""
         for k, v in self.parents.items():
-            if k >= 0 and v == None:
+            if k >= 0 and v is None:
                 return k
         return 0
 
@@ -418,7 +418,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
             else prev_parent
         )
 
-    def _handle_text_elements(
+    def _handle_text_elements(  # noqa: C901
         self,
         element: BaseOxmlElement,
         docx_obj: DocxDocument,
@@ -812,7 +812,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                     f" col {col_idx} grid_span {cell.grid_span} grid_cols_before {row.grid_cols_before}"
                 )
                 if cell is None or cell._tc in cell_set:
-                    _log.debug(f"  skipped since repeated content")
+                    _log.debug("  skipped since repeated content")
                     col_idx += cell.grid_span
                     continue
                 else:
@@ -879,7 +879,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
                     image=ImageRef.from_pil(image=pil_image, dpi=72),
                     caption=None,
                 )
-            except (UnidentifiedImageError, OSError) as e:
+            except (UnidentifiedImageError, OSError):
                 _log.warning("Warning: image cannot be loaded by Pillow")
                 doc.add_picture(
                     parent=self.parents[level - 1],
diff --git a/docling/backend/pdf_backend.py b/docling/backend/pdf_backend.py
index cfecc7e..3d07578 100644
--- a/docling/backend/pdf_backend.py
+++ b/docling/backend/pdf_backend.py
@@ -1,7 +1,8 @@
 from abc import ABC, abstractmethod
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import Iterable, Optional, Set, Union
+from typing import Optional, Set, Union
 
 from docling_core.types.doc import BoundingBox, Size
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
diff --git a/docling/backend/pypdfium2_backend.py b/docling/backend/pypdfium2_backend.py
index 0fce0f8..67e1f05 100644
--- a/docling/backend/pypdfium2_backend.py
+++ b/docling/backend/pypdfium2_backend.py
@@ -1,8 +1,9 @@
 import logging
 import random
+from collections.abc import Iterable
 from io import BytesIO
 from pathlib import Path
-from typing import TYPE_CHECKING, Iterable, List, Optional, Union
+from typing import TYPE_CHECKING, List, Optional, Union
 
 import pypdfium2 as pdfium
 import pypdfium2.raw as pdfium_c
@@ -29,7 +30,7 @@ class PyPdfiumPageBackend(PdfPageBackend):
         self.valid = True  # No better way to tell from pypdfium.
         try:
             self._ppage: pdfium.PdfPage = pdfium_doc[page_no]
-        except PdfiumError as e:
+        except PdfiumError:
             _log.info(
                 f"An exception occurred when loading page {page_no} of document {document_hash}.",
                 exc_info=True,
@@ -225,7 +226,6 @@ class PyPdfiumPageBackend(PdfPageBackend):
     def get_page_image(
         self, scale: float = 1, cropbox: Optional[BoundingBox] = None
     ) -> Image.Image:
-
         page_size = self.get_size()
 
         if not cropbox:
diff --git a/docling/backend/xml/jats_backend.py b/docling/backend/xml/jats_backend.py
index 2409961..23560d3 100755
--- a/docling/backend/xml/jats_backend.py
+++ b/docling/backend/xml/jats_backend.py
@@ -102,13 +102,13 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
 
             doc_info: etree.DocInfo = self.tree.docinfo
             if doc_info.system_url and any(
-                [kwd in doc_info.system_url for kwd in JATS_DTD_URL]
+                kwd in doc_info.system_url for kwd in JATS_DTD_URL
             ):
                 self.valid = True
                 return
             for ent in doc_info.internalDTD.iterentities():
                 if ent.system_url and any(
-                    [kwd in ent.system_url for kwd in JATS_DTD_URL]
+                    kwd in ent.system_url for kwd in JATS_DTD_URL
                 ):
                     self.valid = True
                     return
@@ -232,10 +232,9 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
                 # TODO: once superscript is supported, add label with formatting
                 aff = aff.removeprefix(f"{label[0].text}, ")
             affiliation_names.append(aff)
-        affiliation_ids_names = {
-            id: name
-            for id, name in zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
-        }
+        affiliation_ids_names = dict(
+            zip(meta.xpath(".//aff[@id]/@id"), affiliation_names)
+        )
 
         # Get author names and affiliation names
         for author_node in meta.xpath(
@@ -300,7 +299,6 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
     def _add_abstract(
         self, doc: DoclingDocument, xml_components: XMLComponents
     ) -> None:
-
         for abstract in xml_components["abstract"]:
             text: str = abstract["content"]
             title: str = abstract["label"] or DEFAULT_HEADER_ABSTRACT
@@ -349,7 +347,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
 
         return
 
-    def _parse_element_citation(self, node: etree._Element) -> str:
+    def _parse_element_citation(self, node: etree._Element) -> str:  # noqa: C901
         citation: Citation = {
             "author_names": "",
             "title": "",
@@ -440,7 +438,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
             citation["page"] = node.xpath("fpage")[0].text.replace("\n", " ").strip()
             if len(node.xpath("lpage")) > 0:
                 citation["page"] += (
-                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()
+                    "–" + node.xpath("lpage")[0].text.replace("\n", " ").strip()  # noqa: RUF001
                 )
 
         # Flatten the citation to string
@@ -595,9 +593,8 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
 
         try:
             self._add_table(doc, parent, table)
-        except Exception as e:
-            _log.warning(f"Skipping unsupported table in {str(self.file)}")
-            pass
+        except Exception:
+            _log.warning(f"Skipping unsupported table in {self.file!s}")
 
         return
 
@@ -609,7 +606,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
         )
         return
 
-    def _walk_linear(
+    def _walk_linear(  # noqa: C901
         self, doc: DoclingDocument, parent: NodeItem, node: etree._Element
     ) -> str:
         skip_tags = ["term"]
diff --git a/docling/backend/xml/uspto_backend.py b/docling/backend/xml/uspto_backend.py
index f3fb1ca..b0f8031 100644
--- a/docling/backend/xml/uspto_backend.py
+++ b/docling/backend/xml/uspto_backend.py
@@ -122,7 +122,6 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
 
     @override
     def convert(self) -> DoclingDocument:
-
         if self.parser is not None:
             doc = self.parser.parse(self.patent_content)
             if doc is None:
@@ -163,7 +162,6 @@ class PatentUspto(ABC):
         Returns:
             The patent parsed as a docling document.
         """
-        pass
 
 
 class PatentUsptoIce(PatentUspto):
@@ -265,7 +263,7 @@ class PatentUsptoIce(PatentUspto):
             self.style_html = HtmlEntity()
 
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
 
             Args:
@@ -281,7 +279,7 @@ class PatentUsptoIce(PatentUspto):
             self._start_registered_elements(tag, attributes)
 
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
 
             HTML entities will be skipped by the parser. This method will unescape them
@@ -315,7 +313,7 @@ class PatentUsptoIce(PatentUspto):
                         self.text += unescaped
 
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
 
             Args:
@@ -603,7 +601,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self.style_html = HtmlEntity()
 
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
 
             Args:
@@ -616,7 +614,7 @@ class PatentUsptoGrantV2(PatentUspto):
             self._start_registered_elements(tag, attributes)
 
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
 
             HTML entities will be skipped by the parser. This method will unescape them
@@ -650,7 +648,7 @@ class PatentUsptoGrantV2(PatentUspto):
                         self.text += unescaped
 
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
 
             Args:
@@ -691,7 +689,7 @@ class PatentUsptoGrantV2(PatentUspto):
             if tag in [member.value for member in self.Element]:
                 if (
                     tag == self.Element.HEADING.value
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                 ):
                     level_attr: str = attributes.get("LVL", "")
                     new_level: int = int(level_attr) if level_attr.isnumeric() else 1
@@ -743,7 +741,7 @@ class PatentUsptoGrantV2(PatentUspto):
                 # headers except claims statement
                 elif (
                     self.Element.HEADING.value in self.property
-                    and not self.Element.SDOCL.value in self.property
+                    and self.Element.SDOCL.value not in self.property
                     and text.strip()
                 ):
                     self.parents[self.level + 1] = self.doc.add_heading(
@@ -1164,7 +1162,7 @@ class PatentUsptoAppV1(PatentUspto):
             self.style_html = HtmlEntity()
 
         @override
-        def startElement(self, tag, attributes):  # noqa: N802
+        def startElement(self, tag, attributes):
             """Signal the start of an element.
 
             Args:
@@ -1177,7 +1175,7 @@ class PatentUsptoAppV1(PatentUspto):
             self._start_registered_elements(tag, attributes)
 
         @override
-        def skippedEntity(self, name):  # noqa: N802
+        def skippedEntity(self, name):
             """Receive notification of a skipped entity.
 
             HTML entities will be skipped by the parser. This method will unescape them
@@ -1211,7 +1209,7 @@ class PatentUsptoAppV1(PatentUspto):
                         self.text += unescaped
 
         @override
-        def endElement(self, tag):  # noqa: N802
+        def endElement(self, tag):
             """Signal the end of an element.
 
             Args:
@@ -1474,9 +1472,7 @@ class XmlTable:
                 if cw == 0:
                     offset_w0.append(col["offset"][ic])
 
-            min_colinfo["offset"] = sorted(
-                list(set(col["offset"] + min_colinfo["offset"]))
-            )
+            min_colinfo["offset"] = sorted(set(col["offset"] + min_colinfo["offset"]))
 
         # add back the 0 width cols to offset list
         offset_w0 = list(set(offset_w0))
@@ -1527,7 +1523,7 @@ class XmlTable:
 
         return ncols_max
 
-    def _parse_table(self, table: Tag) -> TableData:
+    def _parse_table(self, table: Tag) -> TableData:  # noqa: C901
         """Parse the content of a table tag.
 
         Args:
@@ -1722,7 +1718,7 @@ class HtmlEntity:
                 "0": "&#8304;",
                 "+": "&#8314;",
                 "-": "&#8315;",
-                "−": "&#8315;",
+                "−": "&#8315;",  # noqa: RUF001
                 "=": "&#8316;",
                 "(": "&#8317;",
                 ")": "&#8318;",
@@ -1746,7 +1742,7 @@ class HtmlEntity:
                 "0": "&#8320;",
                 "+": "&#8330;",
                 "-": "&#8331;",
-                "−": "&#8331;",
+                "−": "&#8331;",  # noqa: RUF001
                 "=": "&#8332;",
                 "(": "&#8333;",
                 ")": "&#8334;",
diff --git a/docling/cli/main.py b/docling/cli/main.py
index 6830c7f..c0718c8 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -6,14 +6,16 @@ import sys
 import tempfile
 import time
 import warnings
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Annotated, Dict, Iterable, List, Optional, Type
+from typing import Annotated, Dict, List, Optional, Type
 
 import rich.table
 import typer
 from docling_core.types.doc import ImageRefMode
 from docling_core.utils.file import resolve_source_to_path
 from pydantic import TypeAdapter
+from rich.console import Console
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
@@ -53,7 +55,6 @@ warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|
 warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
 
 _log = logging.getLogger(__name__)
-from rich.console import Console
 
 console = Console()
 err_console = Console(stderr=True)
@@ -160,7 +161,6 @@ def export_documents(
     export_doctags: bool,
     image_export_mode: ImageRefMode,
 ):
-
     success_count = 0
     failure_count = 0
 
@@ -233,7 +233,7 @@ def _split_list(raw: Optional[str]) -> Optional[List[str]]:
 
 
 @app.command(no_args_is_help=True)
-def convert(
+def convert(  # noqa: C901
     input_sources: Annotated[
         List[str],
         typer.Argument(
@@ -289,7 +289,7 @@ def convert(
             ...,
             help=(
                 f"The OCR engine to use. When --allow-external-plugins is *not* set, the available values are: "
-                f"{', '.join((o.value for o in ocr_engines_enum_internal))}. "
+                f"{', '.join(o.value for o in ocr_engines_enum_internal)}. "
                 f"Use the option --show-external-plugins to see the options allowed with external plugins."
             ),
         ),
@@ -430,7 +430,7 @@ def convert(
     settings.debug.visualize_ocr = debug_visualize_ocr
 
     if from_formats is None:
-        from_formats = [e for e in InputFormat]
+        from_formats = list(InputFormat)
 
     parsed_headers: Optional[Dict[str, str]] = None
     if headers is not None:
diff --git a/docling/cli/models.py b/docling/cli/models.py
index 7bc313c..982bbdd 100644
--- a/docling/cli/models.py
+++ b/docling/cli/models.py
@@ -62,7 +62,7 @@ def download(
     models: Annotated[
         Optional[list[_AvailableModels]],
         typer.Argument(
-            help=f"Models to download (default behavior: a predefined set of models will be downloaded).",
+            help="Models to download (default behavior: a predefined set of models will be downloaded).",
         ),
     ] = None,
     all: Annotated[
@@ -89,14 +89,13 @@ def download(
             "Cannot simultaneously set 'all' parameter and specify models to download."
         )
     if not quiet:
-        FORMAT = "%(message)s"
         logging.basicConfig(
             level=logging.INFO,
             format="[blue]%(message)s[/blue]",
             datefmt="[%X]",
             handlers=[RichHandler(show_level=False, show_time=False, markup=True)],
         )
-    to_download = models or ([m for m in _AvailableModels] if all else _default_models)
+    to_download = models or (list(_AvailableModels) if all else _default_models)
     output_dir = download_models(
         output_dir=output_dir,
         force=force,
diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py
index 8ee53d6..95dcfe7 100644
--- a/docling/datamodel/base_models.py
+++ b/docling/datamodel/base_models.py
@@ -10,7 +10,9 @@ from docling_core.types.doc import (
     TableCell,
 )
 from docling_core.types.doc.page import SegmentedPdfPage, TextCell
-from docling_core.types.io import (  # DO ΝΟΤ REMOVE; explicitly exposed from this location
+
+# DO NOT REMOVE; explicitly exposed from this location
+from docling_core.types.io import (
     DocumentStream,
 )
 from PIL.Image import Image
@@ -233,9 +235,9 @@ class Page(BaseModel):
         None  # Internal PDF backend. By default it is cleared during assembling.
     )
     _default_image_scale: float = 1.0  # Default image scale for external usage.
-    _image_cache: Dict[float, Image] = (
-        {}
-    )  # Cache of images in different scales. By default it is cleared during assembling.
+    _image_cache: Dict[
+        float, Image
+    ] = {}  # Cache of images in different scales. By default it is cleared during assembling.
 
     def get_image(
         self, scale: float = 1.0, cropbox: Optional[BoundingBox] = None
@@ -243,7 +245,7 @@ class Page(BaseModel):
         if self._backend is None:
             return self._image_cache.get(scale, None)
 
-        if not scale in self._image_cache:
+        if scale not in self._image_cache:
             if cropbox is None:
                 self._image_cache[scale] = self._backend.get_page_image(scale=scale)
             else:
diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py
index 93dfd1a..668e824 100644
--- a/docling/datamodel/document.py
+++ b/docling/datamodel/document.py
@@ -1,13 +1,13 @@
 import csv
 import logging
 import re
+from collections.abc import Iterable
 from enum import Enum
 from io import BytesIO
 from pathlib import Path, PurePath
 from typing import (
     TYPE_CHECKING,
     Dict,
-    Iterable,
     List,
     Literal,
     Optional,
@@ -17,6 +17,8 @@ from typing import (
 )
 
 import filetype
+
+# DO NOT REMOVE; explicitly exposed from this location
 from docling_core.types.doc import (
     DocItem,
     DocItemLabel,
@@ -35,14 +37,14 @@ from docling_core.types.legacy_doc.base import (
     PageReference,
     Prov,
     Ref,
+    Table as DsSchemaTable,
+    TableCell,
 )
-from docling_core.types.legacy_doc.base import Table as DsSchemaTable
-from docling_core.types.legacy_doc.base import TableCell
 from docling_core.types.legacy_doc.document import (
     CCSDocumentDescription as DsDocumentDescription,
+    CCSFileInfoObject as DsFileInfoObject,
+    ExportedCCSDocument as DsDocument,
 )
-from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
-from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
 from docling_core.utils.file import resolve_source_to_stream
 from docling_core.utils.legacy import docling_document_to_legacy
 from pydantic import BaseModel
@@ -65,7 +67,7 @@ from docling.datamodel.base_models import (
 )
 from docling.datamodel.settings import DocumentLimits
 from docling.utils.profiling import ProfilingItem
-from docling.utils.utils import create_file_hash, create_hash
+from docling.utils.utils import create_file_hash
 
 if TYPE_CHECKING:
     from docling.document_converter import FormatOption
@@ -134,9 +136,9 @@ class InputDocument(BaseModel):
                     self._init_doc(backend, path_or_stream)
 
             elif isinstance(path_or_stream, BytesIO):
-                assert (
-                    filename is not None
-                ), "Can't construct InputDocument from stream without providing filename arg."
+                assert filename is not None, (
+                    "Can't construct InputDocument from stream without providing filename arg."
+                )
                 self.file = PurePath(filename)
                 self.filesize = path_or_stream.getbuffer().nbytes
 
@@ -228,7 +230,6 @@ class _DummyBackend(AbstractDocumentBackend):
 
 
 class _DocumentConversionInput(BaseModel):
-
     path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
     headers: Optional[Dict[str, str]] = None
     limits: Optional[DocumentLimits] = DocumentLimits()
diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 8e99cd0..a24df89 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -380,7 +380,6 @@ class PaginatedPipelineOptions(PipelineOptions):
 
 
 class VlmPipelineOptions(PaginatedPipelineOptions):
-
     generate_page_images: bool = True
     force_backend_text: bool = (
         False  # (To be used with vlms, or other generative models)
diff --git a/docling/document_converter.py b/docling/document_converter.py
index 7489f49..125681f 100644
--- a/docling/document_converter.py
+++ b/docling/document_converter.py
@@ -1,11 +1,11 @@
 import hashlib
 import logging
-import math
 import sys
 import time
+from collections.abc import Iterable, Iterator
 from functools import partial
 from pathlib import Path
-from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Type, Union
+from typing import Dict, List, Optional, Tuple, Type, Union
 
 from pydantic import BaseModel, ConfigDict, model_validator, validate_call
 
@@ -172,7 +172,7 @@ class DocumentConverter:
         format_options: Optional[Dict[InputFormat, FormatOption]] = None,
     ):
         self.allowed_formats = (
-            allowed_formats if allowed_formats is not None else [e for e in InputFormat]
+            allowed_formats if allowed_formats is not None else list(InputFormat)
         )
         self.format_to_options = {
             format: (
@@ -254,7 +254,7 @@ class DocumentConverter:
 
         if not had_result and raises_on_error:
             raise ConversionError(
-                f"Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
+                "Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
             )
 
     def _convert(
@@ -266,7 +266,7 @@ class DocumentConverter:
             conv_input.docs(self.format_to_options),
             settings.perf.doc_batch_size,  # pass format_options
         ):
-            _log.info(f"Going to convert document batch...")
+            _log.info("Going to convert document batch...")
 
             # parallel processing only within input_batch
             # with ThreadPoolExecutor(
diff --git a/docling/models/api_vlm_model.py b/docling/models/api_vlm_model.py
index 9520122..f7e82b5 100644
--- a/docling/models/api_vlm_model.py
+++ b/docling/models/api_vlm_model.py
@@ -1,4 +1,4 @@
-from typing import Iterable
+from collections.abc import Iterable
 
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
@@ -10,7 +10,6 @@ from docling.utils.profiling import TimeRecorder
 
 
 class ApiVlmModel(BasePageModel):
-
     def __init__(
         self,
         enabled: bool,
diff --git a/docling/models/base_model.py b/docling/models/base_model.py
index 712d329..04df812 100644
--- a/docling/models/base_model.py
+++ b/docling/models/base_model.py
@@ -1,5 +1,6 @@
 from abc import ABC, abstractmethod
-from typing import Any, Generic, Iterable, Optional, Protocol, Type
+from collections.abc import Iterable
+from typing import Generic, Optional, Protocol, Type
 
 from docling_core.types.doc import BoundingBox, DocItem, DoclingDocument, NodeItem
 from typing_extensions import TypeVar
@@ -29,7 +30,6 @@ EnrichElementT = TypeVar("EnrichElementT", default=NodeItem)
 
 
 class GenericEnrichmentModel(ABC, Generic[EnrichElementT]):
-
     elements_batch_size: int = settings.perf.elements_batch_size
 
     @abstractmethod
@@ -50,7 +50,6 @@ class GenericEnrichmentModel(ABC, Generic[EnrichElementT]):
 
 
 class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
-
     def prepare_element(
         self, conv_res: ConversionResult, element: NodeItem
     ) -> Optional[NodeItem]:
@@ -62,7 +61,6 @@ class BaseEnrichmentModel(GenericEnrichmentModel[NodeItem]):
 class BaseItemAndImageEnrichmentModel(
     GenericEnrichmentModel[ItemAndImageEnrichmentElement]
 ):
-
     images_scale: float
     expansion_factor: float = 0.0
 
diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py
index c823580..9f05aed 100644
--- a/docling/models/base_ocr_model.py
+++ b/docling/models/base_ocr_model.py
@@ -1,12 +1,12 @@
 import copy
 import logging
 from abc import abstractmethod
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional, Type
+from typing import List, Optional, Type
 
 import numpy as np
 from docling_core.types.doc import BoundingBox, CoordOrigin
-from docling_core.types.doc.page import BoundingRectangle, PdfTextCell, TextCell
 from PIL import Image, ImageDraw
 from rtree import index
 from scipy.ndimage import binary_dilation, find_objects, label
diff --git a/docling/models/code_formula_model.py b/docling/models/code_formula_model.py
index 10426c2..bf747c5 100644
--- a/docling/models/code_formula_model.py
+++ b/docling/models/code_formula_model.py
@@ -1,7 +1,8 @@
 import re
 from collections import Counter
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 
 import numpy as np
 from docling_core.types.doc import (
diff --git a/docling/models/document_picture_classifier.py b/docling/models/document_picture_classifier.py
index f51d735..6a57a74 100644
--- a/docling/models/document_picture_classifier.py
+++ b/docling/models/document_picture_classifier.py
@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Literal, Optional, Tuple, Union
+from typing import List, Literal, Optional, Union
 
 import numpy as np
 from docling_core.types.doc import (
diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py
index 13eb33c..b40ca50 100644
--- a/docling/models/easyocr_model.py
+++ b/docling/models/easyocr_model.py
@@ -1,8 +1,9 @@
 import logging
 import warnings
 import zipfile
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional, Type
+from typing import List, Optional, Type
 
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -58,12 +59,10 @@ class EasyOcrModel(BaseOcrModel):
                 device = decide_device(accelerator_options.device)
                 # Enable easyocr GPU if running on CUDA, MPS
                 use_gpu = any(
-                    [
-                        device.startswith(x)
-                        for x in [
-                            AcceleratorDevice.CUDA.value,
-                            AcceleratorDevice.MPS.value,
-                        ]
+                    device.startswith(x)
+                    for x in [
+                        AcceleratorDevice.CUDA.value,
+                        AcceleratorDevice.MPS.value,
                     ]
                 )
             else:
@@ -98,8 +97,10 @@ class EasyOcrModel(BaseOcrModel):
         progress: bool = False,
     ) -> Path:
         # Models are located in https://github.com/JaidedAI/EasyOCR/blob/master/easyocr/config.py
-        from easyocr.config import detection_models as det_models_dict
-        from easyocr.config import recognition_models as rec_models_dict
+        from easyocr.config import (
+            detection_models as det_models_dict,
+            recognition_models as rec_models_dict,
+        )
 
         if local_dir is None:
             local_dir = settings.cache_dir / "models" / EasyOcrModel._model_repo_folder
@@ -126,13 +127,11 @@ class EasyOcrModel(BaseOcrModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         if not self.enabled:
             yield from page_batch
             return
 
         for page in page_batch:
-
             assert page._backend is not None
             if not page._backend.is_valid():
                 yield page
diff --git a/docling/models/factories/__init__.py b/docling/models/factories/__init__.py
index 9a3308e..a6adb3f 100644
--- a/docling/models/factories/__init__.py
+++ b/docling/models/factories/__init__.py
@@ -9,7 +9,7 @@ from docling.models.factories.picture_description_factory import (
 logger = logging.getLogger(__name__)
 
 
-@lru_cache()
+@lru_cache
 def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
     factory = OcrFactory()
     factory.load_from_plugins(allow_external_plugins=allow_external_plugins)
@@ -17,7 +17,7 @@ def get_ocr_factory(allow_external_plugins: bool = False) -> OcrFactory:
     return factory
 
 
-@lru_cache()
+@lru_cache
 def get_picture_description_factory(
     allow_external_plugins: bool = False,
 ) -> PictureDescriptionFactory:
diff --git a/docling/models/factories/base_factory.py b/docling/models/factories/base_factory.py
index 542fc7e..208f0ca 100644
--- a/docling/models/factories/base_factory.py
+++ b/docling/models/factories/base_factory.py
@@ -33,7 +33,7 @@ class BaseFactory(Generic[A], metaclass=ABCMeta):
 
     @property
     def registered_kind(self) -> list[str]:
-        return list(opt.kind for opt in self._classes.keys())
+        return [opt.kind for opt in self._classes.keys()]
 
     def get_enum(self) -> enum.Enum:
         return enum.Enum(
diff --git a/docling/models/hf_mlx_model.py b/docling/models/hf_mlx_model.py
index 762a655..63f8fc9 100644
--- a/docling/models/hf_mlx_model.py
+++ b/docling/models/hf_mlx_model.py
@@ -1,25 +1,22 @@
 import logging
 import time
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional
+from typing import Optional
 
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
     AcceleratorOptions,
     HuggingFaceVlmOptions,
 )
-from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
-from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
 
 _log = logging.getLogger(__name__)
 
 
 class HuggingFaceMlxModel(BasePageModel):
-
     def __init__(
         self,
         enabled: bool,
@@ -32,7 +29,6 @@ class HuggingFaceMlxModel(BasePageModel):
         self.vlm_options = vlm_options
 
         if self.enabled:
-
             try:
                 from mlx_vlm import generate, load  # type: ignore
                 from mlx_vlm.prompt_utils import apply_chat_template  # type: ignore
@@ -125,6 +121,8 @@ class HuggingFaceMlxModel(BasePageModel):
                     generation_time = time.time() - start_time
                     page_tags = output
 
+                    _log.debug(f"Generation time {generation_time:.2f} seconds.")
+
                     # inference_time = time.time() - start_time
                     # tokens_per_second = num_tokens / generation_time
                     # print("")
diff --git a/docling/models/hf_vlm_model.py b/docling/models/hf_vlm_model.py
index 2acbe29..29276fc 100644
--- a/docling/models/hf_vlm_model.py
+++ b/docling/models/hf_vlm_model.py
@@ -1,16 +1,15 @@
 import logging
 import time
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, List, Optional
+from typing import Optional
 
 from docling.datamodel.base_models import Page, VlmPrediction
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
     AcceleratorOptions,
     HuggingFaceVlmOptions,
 )
-from docling.datamodel.settings import settings
 from docling.models.base_model import BasePageModel
 from docling.utils.accelerator_utils import decide_device
 from docling.utils.profiling import TimeRecorder
@@ -19,7 +18,6 @@ _log = logging.getLogger(__name__)
 
 
 class HuggingFaceVlmModel(BasePageModel):
-
     def __init__(
         self,
         enabled: bool,
@@ -42,7 +40,7 @@ class HuggingFaceVlmModel(BasePageModel):
             device = decide_device(accelerator_options.device)
             self.device = device
 
-            _log.debug("Available device for HuggingFace VLM: {}".format(device))
+            _log.debug(f"Available device for HuggingFace VLM: {device}")
 
             repo_cache_folder = vlm_options.repo_id.replace("/", "--")
 
@@ -168,6 +166,10 @@ class HuggingFaceVlmModel(BasePageModel):
                     num_tokens = len(generated_ids[0])
                     page_tags = generated_texts
 
+                    _log.debug(
+                        f"Generated {num_tokens} tokens in time {generation_time:.2f} seconds."
+                    )
+
                     # inference_time = time.time() - start_time
                     # tokens_per_second = num_tokens / generation_time
                     # print("")
diff --git a/docling/models/layout_model.py b/docling/models/layout_model.py
index b3cbd95..ae37301 100644
--- a/docling/models/layout_model.py
+++ b/docling/models/layout_model.py
@@ -1,8 +1,9 @@
 import copy
 import logging
 import warnings
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Union
+from typing import Optional
 
 from docling_core.types.doc import DocItemLabel
 from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
@@ -142,7 +143,6 @@ class LayoutModel(BasePageModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         for page in page_batch:
             assert page._backend is not None
             if not page._backend.is_valid():
diff --git a/docling/models/ocr_mac_model.py b/docling/models/ocr_mac_model.py
index 98ca3f1..a8ff55b 100644
--- a/docling/models/ocr_mac_model.py
+++ b/docling/models/ocr_mac_model.py
@@ -1,8 +1,9 @@
 import logging
 import sys
 import tempfile
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Tuple, Type
+from typing import Optional, Type
 
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
@@ -41,7 +42,7 @@ class OcrMacModel(BaseOcrModel):
 
         if self.enabled:
             if "darwin" != sys.platform:
-                raise RuntimeError(f"OcrMac is only supported on Mac.")
+                raise RuntimeError("OcrMac is only supported on Mac.")
             install_errmsg = (
                 "ocrmac is not correctly installed. "
                 "Please install it via `pip install ocrmac` to use this OCR engine. "
@@ -58,7 +59,6 @@ class OcrMacModel(BaseOcrModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         if not self.enabled:
             yield from page_batch
             return
@@ -69,7 +69,6 @@ class OcrMacModel(BaseOcrModel):
                 yield page
             else:
                 with TimeRecorder(conv_res, "ocr"):
-
                     ocr_rects = self.get_ocr_rects(page)
 
                     all_ocr_cells = []
diff --git a/docling/models/page_assemble_model.py b/docling/models/page_assemble_model.py
index 4712abd..7153181 100644
--- a/docling/models/page_assemble_model.py
+++ b/docling/models/page_assemble_model.py
@@ -1,6 +1,7 @@
 import logging
 import re
-from typing import Iterable, List
+from collections.abc import Iterable
+from typing import List
 
 from pydantic import BaseModel
 
@@ -53,9 +54,9 @@ class PageAssembleModel(BasePageModel):
         sanitized_text = "".join(lines)
 
         # Text normalization
-        sanitized_text = sanitized_text.replace("⁄", "/")
-        sanitized_text = sanitized_text.replace("’", "'")
-        sanitized_text = sanitized_text.replace("‘", "'")
+        sanitized_text = sanitized_text.replace("⁄", "/")  # noqa: RUF001
+        sanitized_text = sanitized_text.replace("’", "'")  # noqa: RUF001
+        sanitized_text = sanitized_text.replace("‘", "'")  # noqa: RUF001
         sanitized_text = sanitized_text.replace("“", '"')
         sanitized_text = sanitized_text.replace("”", '"')
         sanitized_text = sanitized_text.replace("•", "·")
@@ -71,7 +72,6 @@ class PageAssembleModel(BasePageModel):
                 yield page
             else:
                 with TimeRecorder(conv_res, "page_assemble"):
-
                     assert page.predictions.layout is not None
 
                     # assembles some JSON output page by page.
@@ -83,7 +83,6 @@ class PageAssembleModel(BasePageModel):
                     for cluster in page.predictions.layout.clusters:
                         # _log.info("Cluster label seen:", cluster.label)
                         if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
-
                             textlines = [
                                 cell.text.replace("\x02", "-").strip()
                                 for cell in cluster.cells
@@ -109,9 +108,7 @@ class PageAssembleModel(BasePageModel):
                                 tbl = page.predictions.tablestructure.table_map.get(
                                     cluster.id, None
                                 )
-                            if (
-                                not tbl
-                            ):  # fallback: add table without structure, if it isn't present
+                            if not tbl:  # fallback: add table without structure, if it isn't present
                                 tbl = Table(
                                     label=cluster.label,
                                     id=cluster.id,
@@ -130,9 +127,7 @@ class PageAssembleModel(BasePageModel):
                                 fig = page.predictions.figures_classification.figure_map.get(
                                     cluster.id, None
                                 )
-                            if (
-                                not fig
-                            ):  # fallback: add figure without classification, if it isn't present
+                            if not fig:  # fallback: add figure without classification, if it isn't present
                                 fig = FigureElement(
                                     label=cluster.label,
                                     id=cluster.id,
diff --git a/docling/models/page_preprocessing_model.py b/docling/models/page_preprocessing_model.py
index d1b29e3..b45b189 100644
--- a/docling/models/page_preprocessing_model.py
+++ b/docling/models/page_preprocessing_model.py
@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional
+from typing import Optional
 
 from PIL import ImageDraw
 from pydantic import BaseModel
diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py
index 1aa7351..44bb5e2 100644
--- a/docling/models/picture_description_api_model.py
+++ b/docling/models/picture_description_api_model.py
@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Type, Union
+from typing import Optional, Type, Union
 
 from PIL import Image
 
diff --git a/docling/models/picture_description_base_model.py b/docling/models/picture_description_base_model.py
index 9616922..2f6e647 100644
--- a/docling/models/picture_description_base_model.py
+++ b/docling/models/picture_description_base_model.py
@@ -1,12 +1,11 @@
-import logging
 from abc import abstractmethod
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Iterable, List, Optional, Type, Union
+from typing import List, Optional, Type, Union
 
 from docling_core.types.doc import (
     DoclingDocument,
     NodeItem,
-    PictureClassificationClass,
     PictureItem,
 )
 from docling_core.types.doc.document import (  # TODO: move import to docling_core.types.doc
diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py
index fc5c51e..374f575 100644
--- a/docling/models/picture_description_vlm_model.py
+++ b/docling/models/picture_description_vlm_model.py
@@ -1,5 +1,6 @@
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Type, Union
+from typing import Optional, Type, Union
 
 from PIL import Image
 
@@ -13,7 +14,6 @@ from docling.utils.accelerator_utils import decide_device
 
 
 class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
-
     @classmethod
     def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
         return PictureDescriptionVlmOptions
@@ -36,7 +36,6 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel):
         self.options: PictureDescriptionVlmOptions
 
         if self.enabled:
-
             if artifacts_path is None:
                 artifacts_path = self.download_models(repo_id=self.options.repo_id)
             else:
diff --git a/docling/models/rapid_ocr_model.py b/docling/models/rapid_ocr_model.py
index e21974d..2c7f435 100644
--- a/docling/models/rapid_ocr_model.py
+++ b/docling/models/rapid_ocr_model.py
@@ -1,6 +1,7 @@
 import logging
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Type
+from typing import Optional, Type
 
 import numpy
 from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -74,13 +75,11 @@ class RapidOcrModel(BaseOcrModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         if not self.enabled:
             yield from page_batch
             return
 
         for page in page_batch:
-
             assert page._backend is not None
             if not page._backend.is_valid():
                 yield page
diff --git a/docling/models/readingorder_model.py b/docling/models/readingorder_model.py
index e7bdd1a..4373536 100644
--- a/docling/models/readingorder_model.py
+++ b/docling/models/readingorder_model.py
@@ -1,12 +1,7 @@
-import copy
-import random
 from pathlib import Path
 from typing import Dict, List
 
 from docling_core.types.doc import (
-    BoundingBox,
-    CoordOrigin,
-    DocItem,
     DocItemLabel,
     DoclingDocument,
     DocumentOrigin,
@@ -17,13 +12,10 @@ from docling_core.types.doc import (
     TableData,
 )
 from docling_core.types.doc.document import ContentLayer
-from docling_core.types.legacy_doc.base import Ref
-from docling_core.types.legacy_doc.document import BaseText
 from docling_ibm_models.reading_order.reading_order_rb import (
     PageElement as ReadingOrderPageElement,
+    ReadingOrderPredictor,
 )
-from docling_ibm_models.reading_order.reading_order_rb import ReadingOrderPredictor
-from PIL import ImageDraw
 from pydantic import BaseModel, ConfigDict
 
 from docling.datamodel.base_models import (
@@ -35,7 +27,6 @@ from docling.datamodel.base_models import (
     TextElement,
 )
 from docling.datamodel.document import ConversionResult
-from docling.datamodel.settings import settings
 from docling.utils.profiling import ProfilingScope, TimeRecorder
 
 
@@ -53,12 +44,10 @@ class ReadingOrderModel:
     def _assembled_to_readingorder_elements(
         self, conv_res: ConversionResult
     ) -> List[ReadingOrderPageElement]:
-
         elements: List[ReadingOrderPageElement] = []
         page_no_to_pages = {p.page_no: p for p in conv_res.pages}
 
         for element in conv_res.assembled.elements:
-
             page_height = page_no_to_pages[element.page_no].size.height  # type: ignore
             bbox = element.cluster.bbox.to_bottom_left_origin(page_height)
             text = element.text or ""
@@ -84,7 +73,6 @@ class ReadingOrderModel:
     def _add_child_elements(
         self, element: BasePageElement, doc_item: NodeItem, doc: DoclingDocument
     ):
-
         child: Cluster
         for child in element.cluster.children:
             c_label = child.label
@@ -110,7 +98,7 @@ class ReadingOrderModel:
             else:
                 doc.add_text(parent=doc_item, label=c_label, text=c_text, prov=c_prov)
 
-    def _readingorder_elements_to_docling_doc(
+    def _readingorder_elements_to_docling_doc(  # noqa: C901
         self,
         conv_res: ConversionResult,
         ro_elements: List[ReadingOrderPageElement],
@@ -118,7 +106,6 @@ class ReadingOrderModel:
         el_to_footnotes_mapping: Dict[int, List[int]],
         el_merges_mapping: Dict[int, List[int]],
     ) -> DoclingDocument:
-
         id_to_elem = {
             RefItem(cref=f"#/{elem.page_no}/{elem.cluster.id}").cref: elem
             for elem in conv_res.assembled.elements
@@ -192,7 +179,6 @@ class ReadingOrderModel:
 
                             code_item.footnotes.append(new_footnote_item.get_ref())
                 else:
-
                     new_item, current_list = self._handle_text_element(
                         element, out_doc, current_list, page_height
                     )
@@ -206,7 +192,6 @@ class ReadingOrderModel:
                             )
 
             elif isinstance(element, Table):
-
                 tbl_data = TableData(
                     num_rows=element.num_rows,
                     num_cols=element.num_cols,
@@ -342,12 +327,12 @@ class ReadingOrderModel:
         return new_item, current_list
 
     def _merge_elements(self, element, merged_elem, new_item, page_height):
-        assert isinstance(
-            merged_elem, type(element)
-        ), "Merged element must be of same type as element."
-        assert (
-            merged_elem.label == new_item.label
-        ), "Labels of merged elements must match."
+        assert isinstance(merged_elem, type(element)), (
+            "Merged element must be of same type as element."
+        )
+        assert merged_elem.label == new_item.label, (
+            "Labels of merged elements must match."
+        )
         prov = ProvenanceItem(
             page_no=element.page_no + 1,
             charspan=(
diff --git a/docling/models/table_structure_model.py b/docling/models/table_structure_model.py
index 34a7d9d..44579b9 100644
--- a/docling/models/table_structure_model.py
+++ b/docling/models/table_structure_model.py
@@ -1,13 +1,13 @@
 import copy
 import warnings
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Union
+from typing import Optional
 
 import numpy
 from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
 from docling_core.types.doc.page import (
     BoundingRectangle,
-    SegmentedPdfPage,
     TextCellUnit,
 )
 from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
@@ -44,7 +44,6 @@ class TableStructureModel(BasePageModel):
 
         self.enabled = enabled
         if self.enabled:
-
             if artifacts_path is None:
                 artifacts_path = self.download_models() / self._model_path
             else:
@@ -175,7 +174,6 @@ class TableStructureModel(BasePageModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         if not self.enabled:
             yield from page_batch
             return
@@ -186,7 +184,6 @@ class TableStructureModel(BasePageModel):
                 yield page
             else:
                 with TimeRecorder(conv_res, "table_structure"):
-
                     assert page.predictions.layout is not None
                     assert page.size is not None
 
@@ -260,7 +257,6 @@ class TableStructureModel(BasePageModel):
                             table_out = tf_output[0]
                             table_cells = []
                             for element in table_out["tf_responses"]:
-
                                 if not self.do_cell_matching:
                                     the_bbox = BoundingBox.model_validate(
                                         element["bbox"]
diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py
index 1e7fe03..91b4555 100644
--- a/docling/models/tesseract_ocr_cli_model.py
+++ b/docling/models/tesseract_ocr_cli_model.py
@@ -3,9 +3,10 @@ import io
 import logging
 import os
 import tempfile
+from collections.abc import Iterable
 from pathlib import Path
 from subprocess import DEVNULL, PIPE, Popen
-from typing import Iterable, List, Optional, Tuple, Type
+from typing import List, Optional, Tuple, Type
 
 import pandas as pd
 from docling_core.types.doc import BoundingBox, CoordOrigin
@@ -63,8 +64,7 @@ class TesseractOcrCliModel(BaseOcrModel):
                 )
 
     def _get_name_and_version(self) -> Tuple[str, str]:
-
-        if self._name != None and self._version != None:
+        if self._name is not None and self._version is not None:
             return self._name, self._version  # type: ignore
 
         cmd = [self.options.tesseract_cmd, "--version"]
@@ -125,14 +125,16 @@ class TesseractOcrCliModel(BaseOcrModel):
         # _log.info(decoded_data)
 
         # Read the TSV file generated by Tesseract
-        df = pd.read_csv(io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t")
+        df_result = pd.read_csv(
+            io.StringIO(decoded_data), quoting=csv.QUOTE_NONE, sep="\t"
+        )
 
         # Display the dataframe (optional)
         # _log.info("df: ", df.head())
 
         # Filter rows that contain actual text (ignore header or empty rows)
-        df_filtered = df[
-            df["text"].notnull() & (df["text"].apply(str).str.strip() != "")
+        df_filtered = df_result[
+            df_result["text"].notna() & (df_result["text"].apply(str).str.strip() != "")
         ]
 
         return df_filtered
@@ -149,10 +151,10 @@ class TesseractOcrCliModel(BaseOcrModel):
         proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
         output, _ = proc.communicate()
         decoded_data = output.decode("utf-8")
-        df = pd.read_csv(
+        df_detected = pd.read_csv(
             io.StringIO(decoded_data), sep=":", header=None, names=["key", "value"]
         )
-        scripts = df.loc[df["key"] == "Script"].value.tolist()
+        scripts = df_detected.loc[df_detected["key"] == "Script"].value.tolist()
         if len(scripts) == 0:
             _log.warning("Tesseract cannot detect the script of the page")
             return None
@@ -183,11 +185,11 @@ class TesseractOcrCliModel(BaseOcrModel):
         proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
         output, _ = proc.communicate()
         decoded_data = output.decode("utf-8")
-        df = pd.read_csv(io.StringIO(decoded_data), header=None)
-        self._tesseract_languages = df[0].tolist()[1:]
+        df_list = pd.read_csv(io.StringIO(decoded_data), header=None)
+        self._tesseract_languages = df_list[0].tolist()[1:]
 
         # Decide the script prefix
-        if any([l.startswith("script/") for l in self._tesseract_languages]):
+        if any(lang.startswith("script/") for lang in self._tesseract_languages):
             script_prefix = "script/"
         else:
             script_prefix = ""
@@ -197,7 +199,6 @@ class TesseractOcrCliModel(BaseOcrModel):
     def __call__(
         self, conv_res: ConversionResult, page_batch: Iterable[Page]
     ) -> Iterable[Page]:
-
         if not self.enabled:
             yield from page_batch
             return
@@ -225,19 +226,19 @@ class TesseractOcrCliModel(BaseOcrModel):
                                 fname = image_file.name
                                 high_res_image.save(image_file)
 
-                            df = self._run_tesseract(fname)
+                            df_result = self._run_tesseract(fname)
                         finally:
                             if os.path.exists(fname):
                                 os.remove(fname)
 
-                        # _log.info(df)
+                        # _log.info(df_result)
 
                         # Print relevant columns (bounding box and text)
-                        for ix, row in df.iterrows():
+                        for ix, row in df_result.iterrows():
                             text = row["text"]
                             conf = row["conf"]
 
-                            l = float(row["left"])
+                            l = float(row["left"])  # noqa: E741
                             b = float(row["top"])
                             w = float(row["width"])
                             h = float(row["height"])
diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py
index 84a02a3..fbe907c 100644
--- a/docling/models/tesseract_ocr_model.py
+++ b/docling/models/tesseract_ocr_model.py
@@ -1,6 +1,7 @@
 import logging
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable, Optional, Type
+from typing import Optional, Type
 
 from docling_core.types.doc import BoundingBox, CoordOrigin
 from docling_core.types.doc.page import BoundingRectangle, TextCell
@@ -37,9 +38,6 @@ class TesseractOcrModel(BaseOcrModel):
         self.options: TesseractOcrOptions
 
         self.scale = 3  # multiplier for 72 dpi == 216 dpi.
-        self.reader = None
-        self.osd_reader = None
-        self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
 
         if self.enabled:
             install_errmsg = (
@@ -64,7 +62,7 @@ class TesseractOcrModel(BaseOcrModel):
                 raise ImportError(install_errmsg)
             try:
                 tesseract_version = tesserocr.tesseract_version()
-            except:
+            except Exception:
                 raise ImportError(install_errmsg)
 
             _, self._tesserocr_languages = tesserocr.get_languages()
@@ -75,7 +73,7 @@ class TesseractOcrModel(BaseOcrModel):
             _log.debug("Initializing TesserOCR: %s", tesseract_version)
             lang = "+".join(self.options.lang)
 
-            if any([l.startswith("script/") for l in self._tesserocr_languages]):
+            if any(lang.startswith("script/") for lang in self._tesserocr_languages):
                 self.script_prefix = "script/"
             else:
                 self.script_prefix = ""
@@ -86,6 +84,10 @@ class TesseractOcrModel(BaseOcrModel):
                 "oem": tesserocr.OEM.DEFAULT,
             }
 
+            self.reader = None
+            self.osd_reader = None
+            self.script_readers: dict[str, tesserocr.PyTessBaseAPI] = {}
+
             if self.options.path is not None:
                 tesserocr_kwargs["path"] = self.options.path
 
diff --git a/docling/pipeline/base_pipeline.py b/docling/pipeline/base_pipeline.py
index 1bf48ef..29475d6 100644
--- a/docling/pipeline/base_pipeline.py
+++ b/docling/pipeline/base_pipeline.py
@@ -3,9 +3,10 @@ import logging
 import time
 import traceback
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Iterable, List
+from collections.abc import Iterable
+from typing import Any, Callable, List
 
-from docling_core.types.doc import DoclingDocument, NodeItem
+from docling_core.types.doc import NodeItem
 
 from docling.backend.abstract_backend import AbstractDocumentBackend
 from docling.backend.pdf_backend import PdfDocumentBackend
@@ -64,7 +65,6 @@ class BasePipeline(ABC):
         return conv_res
 
     def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:
-
         def _prepare_elements(
             conv_res: ConversionResult, model: GenericEnrichmentModel[Any]
         ) -> Iterable[NodeItem]:
@@ -113,7 +113,6 @@ class BasePipeline(ABC):
 
 
 class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
-
     def __init__(self, pipeline_options: PipelineOptions):
         super().__init__(pipeline_options)
         self.keep_backend = False
@@ -127,7 +126,6 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
         yield from page_batch
 
     def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
-
         if not isinstance(conv_res.input._backend, PdfDocumentBackend):
             raise RuntimeError(
                 f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
@@ -139,8 +137,7 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
 
         total_elapsed_time = 0.0
         with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
-
-            for i in range(0, conv_res.input.page_count):
+            for i in range(conv_res.input.page_count):
                 start_page, end_page = conv_res.input.limits.page_range
                 if (start_page - 1) <= i <= (end_page - 1):
                     conv_res.pages.append(Page(page_no=i))
@@ -161,7 +158,6 @@ class PaginatedPipeline(BasePipeline):  # TODO this is a bad name.
                     pipeline_pages = self._apply_on_pages(conv_res, init_pages)
 
                     for p in pipeline_pages:  # Must exhaust!
-
                         # Cleanup cached images
                         if not self.keep_images:
                             p._image_cache = {}
diff --git a/docling/pipeline/simple_pipeline.py b/docling/pipeline/simple_pipeline.py
index fb98523..2e8f0ea 100644
--- a/docling/pipeline/simple_pipeline.py
+++ b/docling/pipeline/simple_pipeline.py
@@ -24,7 +24,6 @@ class SimplePipeline(BasePipeline):
         super().__init__(pipeline_options)
 
     def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
-
         if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
             raise RuntimeError(
                 f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py
index ae2d918..fe93c6c 100644
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -1,5 +1,4 @@
 import logging
-import sys
 import warnings
 from pathlib import Path
 from typing import Optional, cast
diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py
index 79279fd..9a7b51e 100644
--- a/docling/pipeline/vlm_pipeline.py
+++ b/docling/pipeline/vlm_pipeline.py
@@ -1,5 +1,4 @@
 import logging
-import warnings
 from io import BytesIO
 from pathlib import Path
 from typing import List, Optional, Union, cast
@@ -32,7 +31,6 @@ _log = logging.getLogger(__name__)
 
 
 class VlmPipeline(PaginatedPipeline):
-
     def __init__(self, pipeline_options: VlmPipelineOptions):
         super().__init__(pipeline_options)
         self.keep_backend = True
@@ -114,7 +112,6 @@ class VlmPipeline(PaginatedPipeline):
 
     def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
         with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
-
             if (
                 self.pipeline_options.vlm_options.response_format
                 == ResponseFormat.DOCTAGS
diff --git a/docling/utils/export.py b/docling/utils/export.py
index 2c0077e..debf09f 100644
--- a/docling/utils/export.py
+++ b/docling/utils/export.py
@@ -1,8 +1,8 @@
 import logging
-from typing import Any, Dict, Iterable, List, Tuple, Union
+from collections.abc import Iterable
+from typing import Any, Dict, List, Tuple, Union
 
 from docling_core.types.doc import BoundingBox, CoordOrigin
-from docling_core.types.doc.page import TextCell
 from docling_core.types.legacy_doc.base import BaseCell, BaseText, Ref, Table
 
 from docling.datamodel.document import ConversionResult, Page
@@ -13,7 +13,6 @@ _log = logging.getLogger(__name__)
 def generate_multimodal_pages(
     doc_result: ConversionResult,
 ) -> Iterable[Tuple[str, str, List[Dict[str, Any]], List[Dict[str, Any]], Page]]:
-
     label_to_doclaynet = {
         "title": "title",
         "table-of-contents": "document_index",
@@ -122,7 +121,6 @@ def generate_multimodal_pages(
     if doc.main_text is None:
         return
     for ix, orig_item in enumerate(doc.main_text):
-
         item = doc._resolve_ref(orig_item) if isinstance(orig_item, Ref) else orig_item
         if item is None or item.prov is None or len(item.prov) == 0:
             _log.debug(f"Skipping item {orig_item}")
diff --git a/docling/utils/glm_utils.py b/docling/utils/glm_utils.py
index c3c4353..b67281f 100644
--- a/docling/utils/glm_utils.py
+++ b/docling/utils/glm_utils.py
@@ -29,7 +29,7 @@ def resolve_item(paths, obj):
 
     try:
         key = int(paths[0])
-    except:
+    except Exception:
         key = paths[0]
 
     if len(paths) == 1:
@@ -67,7 +67,7 @@ def _flatten_table_grid(grid: List[List[dict]]) -> List[dict]:
     return unique_objects
 
 
-def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:
+def to_docling_document(doc_glm, update_name_label=False) -> DoclingDocument:  # noqa: C901
     origin = DocumentOrigin(
         mimetype="application/pdf",
         filename=doc_glm["file-info"]["filename"],
diff --git a/docling/utils/layout_postprocessor.py b/docling/utils/layout_postprocessor.py
index 17d8f8b..4c25655 100644
--- a/docling/utils/layout_postprocessor.py
+++ b/docling/utils/layout_postprocessor.py
@@ -18,7 +18,7 @@ class UnionFind:
 
     def __init__(self, elements):
         self.parent = {elem: elem for elem in elements}
-        self.rank = {elem: 0 for elem in elements}
+        self.rank = dict.fromkeys(elements, 0)
 
     def find(self, x):
         if self.parent[x] != x:
@@ -484,7 +484,9 @@ class LayoutPostprocessor:
         spatial_index = (
             self.regular_index
             if cluster_type == "regular"
-            else self.picture_index if cluster_type == "picture" else self.wrapper_index
+            else self.picture_index
+            if cluster_type == "picture"
+            else self.wrapper_index
         )
 
         # Map of currently valid clusters
diff --git a/docling/utils/model_downloader.py b/docling/utils/model_downloader.py
index 694fe04..6a1eb83 100644
--- a/docling/utils/model_downloader.py
+++ b/docling/utils/model_downloader.py
@@ -37,7 +37,7 @@ def download_models(
     output_dir.mkdir(exist_ok=True, parents=True)
 
     if with_layout:
-        _log.info(f"Downloading layout model...")
+        _log.info("Downloading layout model...")
         LayoutModel.download_models(
             local_dir=output_dir / LayoutModel._model_repo_folder,
             force=force,
@@ -45,7 +45,7 @@ def download_models(
         )
 
     if with_tableformer:
-        _log.info(f"Downloading tableformer model...")
+        _log.info("Downloading tableformer model...")
         TableStructureModel.download_models(
             local_dir=output_dir / TableStructureModel._model_repo_folder,
             force=force,
@@ -53,7 +53,7 @@ def download_models(
         )
 
     if with_picture_classifier:
-        _log.info(f"Downloading picture classifier model...")
+        _log.info("Downloading picture classifier model...")
         DocumentPictureClassifier.download_models(
             local_dir=output_dir / DocumentPictureClassifier._model_repo_folder,
             force=force,
@@ -61,7 +61,7 @@ def download_models(
         )
 
     if with_code_formula:
-        _log.info(f"Downloading code formula model...")
+        _log.info("Downloading code formula model...")
         CodeFormulaModel.download_models(
             local_dir=output_dir / CodeFormulaModel._model_repo_folder,
             force=force,
@@ -69,7 +69,7 @@ def download_models(
         )
 
     if with_smolvlm:
-        _log.info(f"Downloading SmolVlm model...")
+        _log.info("Downloading SmolVlm model...")
         PictureDescriptionVlmModel.download_models(
             repo_id=smolvlm_picture_description.repo_id,
             local_dir=output_dir / smolvlm_picture_description.repo_cache_folder,
@@ -78,7 +78,7 @@ def download_models(
         )
 
     if with_granite_vision:
-        _log.info(f"Downloading Granite Vision model...")
+        _log.info("Downloading Granite Vision model...")
         PictureDescriptionVlmModel.download_models(
             repo_id=granite_picture_description.repo_id,
             local_dir=output_dir / granite_picture_description.repo_cache_folder,
@@ -87,7 +87,7 @@ def download_models(
         )
 
     if with_easyocr:
-        _log.info(f"Downloading easyocr models...")
+        _log.info("Downloading easyocr models...")
         EasyOcrModel.download_models(
             local_dir=output_dir / EasyOcrModel._model_repo_folder,
             force=force,
diff --git a/docling/utils/utils.py b/docling/utils/utils.py
index 1261f86..11b9fdd 100644
--- a/docling/utils/utils.py
+++ b/docling/utils/utils.py
@@ -13,7 +13,7 @@ def chunkify(iterator, chunk_size):
     if isinstance(iterator, List):
         iterator = iter(iterator)
     for first in iterator:  # Take the first element from the iterator
-        yield [first] + list(islice(iterator, chunk_size - 1))
+        yield [first, *list(islice(iterator, chunk_size - 1))]
 
 
 def create_file_hash(path_or_stream: Union[BytesIO, Path]) -> str:
diff --git a/docs/examples/backend_xml_rag.ipynb b/docs/examples/backend_xml_rag.ipynb
index 091f116..60872c3 100644
--- a/docs/examples/backend_xml_rag.ipynb
+++ b/docs/examples/backend_xml_rag.ipynb
@@ -383,7 +383,7 @@
     "\n",
     "print(f\"Downloading {url}...\")\n",
     "buf = BytesIO(requests.get(url).content)\n",
-    "print(f\"Parsing zip file, splitting into XML sections, and exporting to files...\")\n",
+    "print(\"Parsing zip file, splitting into XML sections, and exporting to files...\")\n",
     "with zipfile.ZipFile(buf) as zf:\n",
     "    res = zf.testzip()\n",
     "    if res:\n",
@@ -544,7 +544,7 @@
    "source": [
     "doc = backend.convert()\n",
     "\n",
-    "claims_sec = [item for item in doc.texts if item.text == \"CLAIMS\"][0]\n",
+    "claims_sec = next(item for item in doc.texts if item.text == \"CLAIMS\")\n",
     "print(f'Patent \"{doc.texts[0].text}\" has {len(claims_sec.children)} claims')"
    ]
   },
diff --git a/docs/examples/batch_convert.py b/docs/examples/batch_convert.py
index fd68e62..25eb2ba 100644
--- a/docs/examples/batch_convert.py
+++ b/docs/examples/batch_convert.py
@@ -1,8 +1,8 @@
 import json
 import logging
 import time
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable
 
 import yaml
 from docling_core.types.doc import ImageRefMode
@@ -11,7 +11,6 @@ from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBacke
 from docling.datamodel.base_models import ConversionStatus, InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions
-from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
 _log = logging.getLogger(__name__)
diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py
index ddc1921..3b8ae6d 100644
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -3,7 +3,6 @@ import logging
 import time
 from pathlib import Path
 
-from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
@@ -11,9 +10,6 @@ from docling.datamodel.pipeline_options import (
     PdfPipelineOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
-from docling.models.ocr_mac_model import OcrMacOptions
-from docling.models.tesseract_ocr_cli_model import TesseractCliOcrOptions
-from docling.models.tesseract_ocr_model import TesseractOcrOptions
 
 _log = logging.getLogger(__name__)
 
diff --git a/docs/examples/develop_formula_understanding.py b/docs/examples/develop_formula_understanding.py
index ca24d95..beb1575 100644
--- a/docs/examples/develop_formula_understanding.py
+++ b/docs/examples/develop_formula_understanding.py
@@ -3,8 +3,8 @@
 # It does not run the actual formula understanding model.
 
 import logging
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Iterable
 
 from docling_core.types.doc import DocItemLabel, DoclingDocument, NodeItem, TextItem
 
@@ -49,7 +49,6 @@ class ExampleFormulaUnderstandingEnrichmentModel(BaseItemAndImageEnrichmentModel
 
 # How the pipeline can be extended.
 class ExampleFormulaUnderstandingPipeline(StandardPdfPipeline):
-
     def __init__(self, pipeline_options: ExampleFormulaUnderstandingPipelineOptions):
         super().__init__(pipeline_options)
         self.pipeline_options: ExampleFormulaUnderstandingPipelineOptions
@@ -85,7 +84,7 @@ def main():
             )
         }
     )
-    result = doc_converter.convert(input_doc_path)
+    doc_converter.convert(input_doc_path)
 
 
 if __name__ == "__main__":
diff --git a/docs/examples/develop_picture_enrichment.py b/docs/examples/develop_picture_enrichment.py
index 9991afe..9e3d306 100644
--- a/docs/examples/develop_picture_enrichment.py
+++ b/docs/examples/develop_picture_enrichment.py
@@ -3,8 +3,9 @@
 # It does not run the actual picture classifier model.
 
 import logging
+from collections.abc import Iterable
 from pathlib import Path
-from typing import Any, Iterable
+from typing import Any
 
 from docling_core.types.doc import (
     DoclingDocument,
diff --git a/docs/examples/export_figures.py b/docs/examples/export_figures.py
index c218666..8ed14a7 100644
--- a/docs/examples/export_figures.py
+++ b/docs/examples/export_figures.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
 from docling_core.types.doc import ImageRefMode, PictureItem, TableItem
 
-from docling.datamodel.base_models import FigureElement, InputFormat, Table
+from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
diff --git a/docs/examples/export_multimodal.py b/docs/examples/export_multimodal.py
index e7ea3df..bef74bf 100644
--- a/docs/examples/export_multimodal.py
+++ b/docs/examples/export_multimodal.py
@@ -51,7 +51,6 @@ def main():
         page_segments,
         page,
     ) in generate_multimodal_pages(conv_res):
-
         dpi = page._default_image_scale * 72
 
         rows.append(
@@ -81,10 +80,10 @@ def main():
         )
 
     # Generate one parquet from all documents
-    df = pd.json_normalize(rows)
+    df_result = pd.json_normalize(rows)
     now = datetime.datetime.now()
     output_filename = output_dir / f"multimodal_{now:%Y-%m-%d_%H%M%S}.parquet"
-    df.to_parquet(output_filename)
+    df_result.to_parquet(output_filename)
 
     end_time = time.time() - start_time
 
diff --git a/docs/examples/export_tables.py b/docs/examples/export_tables.py
index 8f09292..9a911d8 100644
--- a/docs/examples/export_tables.py
+++ b/docs/examples/export_tables.py
@@ -32,12 +32,12 @@ def main():
         print(table_df.to_markdown())
 
         # Save the table as csv
-        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.csv"
+        element_csv_filename = output_dir / f"{doc_filename}-table-{table_ix + 1}.csv"
         _log.info(f"Saving CSV table to {element_csv_filename}")
         table_df.to_csv(element_csv_filename)
 
         # Save the table as html
-        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix+1}.html"
+        element_html_filename = output_dir / f"{doc_filename}-table-{table_ix + 1}.html"
         _log.info(f"Saving HTML table to {element_html_filename}")
         with element_html_filename.open("w") as fp:
             fp.write(table.export_to_html(doc=conv_res.document))
diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py
index 8390d5f..5525e87 100644
--- a/docs/examples/full_page_ocr.py
+++ b/docs/examples/full_page_ocr.py
@@ -1,14 +1,9 @@
 from pathlib import Path
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
-    EasyOcrOptions,
-    OcrMacOptions,
     PdfPipelineOptions,
-    RapidOcrOptions,
     TesseractCliOcrOptions,
-    TesseractOcrOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
diff --git a/docs/examples/hybrid_chunking.ipynb b/docs/examples/hybrid_chunking.ipynb
index 2f6d945..c8a8f42 100644
--- a/docs/examples/hybrid_chunking.ipynb
+++ b/docs/examples/hybrid_chunking.ipynb
@@ -153,10 +153,10 @@
    "source": [
     "for i, chunk in enumerate(chunk_iter):\n",
     "    print(f\"=== {i} ===\")\n",
-    "    print(f\"chunk.text:\\n{repr(f'{chunk.text[:300]}…')}\")\n",
+    "    print(f\"chunk.text:\\n{f'{chunk.text[:300]}…'!r}\")\n",
     "\n",
     "    enriched_text = chunker.serialize(chunk=chunk)\n",
-    "    print(f\"chunker.serialize(chunk):\\n{repr(f'{enriched_text[:300]}…')}\")\n",
+    "    print(f\"chunker.serialize(chunk):\\n{f'{enriched_text[:300]}…'!r}\")\n",
     "\n",
     "    print()"
    ]
@@ -353,11 +353,11 @@
     "for i, chunk in enumerate(chunks):\n",
     "    print(f\"=== {i} ===\")\n",
     "    txt_tokens = len(tokenizer.tokenize(chunk.text))\n",
-    "    print(f\"chunk.text ({txt_tokens} tokens):\\n{repr(chunk.text)}\")\n",
+    "    print(f\"chunk.text ({txt_tokens} tokens):\\n{chunk.text!r}\")\n",
     "\n",
     "    ser_txt = chunker.serialize(chunk=chunk)\n",
     "    ser_tokens = len(tokenizer.tokenize(ser_txt))\n",
-    "    print(f\"chunker.serialize(chunk) ({ser_tokens} tokens):\\n{repr(ser_txt)}\")\n",
+    "    print(f\"chunker.serialize(chunk) ({ser_tokens} tokens):\\n{ser_txt!r}\")\n",
     "\n",
     "    print()"
    ]
diff --git a/docs/examples/minimal_vlm_pipeline.py b/docs/examples/minimal_vlm_pipeline.py
index 6a15fe4..fab6342 100644
--- a/docs/examples/minimal_vlm_pipeline.py
+++ b/docs/examples/minimal_vlm_pipeline.py
@@ -2,17 +2,14 @@ import json
 import time
 from pathlib import Path
 
-import yaml
+from docling_core.types.doc import DocItemLabel, ImageRefMode
+from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
 
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
-    AcceleratorDevice,
     VlmPipelineOptions,
-    granite_vision_vlm_conversion_options,
-    smoldocling_vlm_conversion_options,
     smoldocling_vlm_mlx_conversion_options,
 )
-from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline
 
@@ -39,9 +36,6 @@ pipeline_options.vlm_options = smoldocling_vlm_mlx_conversion_options
 ## Alternative VLM models:
 # pipeline_options.vlm_options = granite_vision_vlm_conversion_options
 
-from docling_core.types.doc import DocItemLabel, ImageRefMode
-from docling_core.types.doc.document import DEFAULT_EXPORT_LABELS
-
 ## Set up pipeline for PDF or image inputs
 converter = DocumentConverter(
     format_options={
@@ -62,7 +56,7 @@ out_path.mkdir(parents=True, exist_ok=True)
 for source in sources:
     start_time = time.time()
     print("================================================")
-    print("Processing... {}".format(source))
+    print(f"Processing... {source}")
     print("================================================")
     print("")
 
@@ -77,7 +71,7 @@ for source in sources:
         print(page.predictions.vlm_response.text)
 
     res.document.save_as_html(
-        filename=Path("{}/{}.html".format(out_path, res.input.file.stem)),
+        filename=Path(f"{out_path}/{res.input.file.stem}.html"),
         image_mode=ImageRefMode.REFERENCED,
         labels=[*DEFAULT_EXPORT_LABELS, DocItemLabel.FOOTNOTE],
     )
diff --git a/docs/examples/pictures_description.ipynb b/docs/examples/pictures_description.ipynb
index feeb00b..a40a73a 100644
--- a/docs/examples/pictures_description.ipynb
+++ b/docs/examples/pictures_description.ipynb
@@ -144,7 +144,7 @@
     "for pic in doc.pictures[:5]:\n",
     "    html_item = (\n",
     "        f\"<h3>Picture <code>{pic.self_ref}</code></h3>\"\n",
-    "        f'<img src=\"{str(pic.image.uri)}\" /><br />'\n",
+    "        f'<img src=\"{pic.image.uri!s}\" /><br />'\n",
     "        f\"<h4>Caption</h4>{pic.caption_text(doc=doc)}<br />\"\n",
     "    )\n",
     "    for annotation in pic.annotations:\n",
@@ -252,7 +252,7 @@
     "for pic in doc.pictures[:5]:\n",
     "    html_item = (\n",
     "        f\"<h3>Picture <code>{pic.self_ref}</code></h3>\"\n",
-    "        f'<img src=\"{str(pic.image.uri)}\" /><br />'\n",
+    "        f'<img src=\"{pic.image.uri!s}\" /><br />'\n",
     "        f\"<h4>Caption</h4>{pic.caption_text(doc=doc)}<br />\"\n",
     "    )\n",
     "    for annotation in pic.annotations:\n",
diff --git a/docs/examples/rag_azuresearch.ipynb b/docs/examples/rag_azuresearch.ipynb
index 9f867b1..b206069 100644
--- a/docs/examples/rag_azuresearch.ipynb
+++ b/docs/examples/rag_azuresearch.ipynb
@@ -283,7 +283,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -369,7 +369,7 @@
     "    new_index = SearchIndex(name=index_name, fields=fields, vector_search=vector_search)\n",
     "    try:\n",
     "        index_client.delete_index(index_name)\n",
-    "    except:\n",
+    "    except Exception:\n",
     "        pass\n",
     "\n",
     "    index_client.create_or_update_index(new_index)\n",
@@ -487,7 +487,7 @@
     "\n",
     "    all_succeeded = all(r.succeeded for r in resp)\n",
     "    console.print(\n",
-    "        f\"Uploaded batch {i} -> {i+len(subset)}; all_succeeded: {all_succeeded}, \"\n",
+    "        f\"Uploaded batch {i} -> {i + len(subset)}; all_succeeded: {all_succeeded}, \"\n",
     "        f\"first_doc_status_code: {resp[0].status_code}\"\n",
     "    )\n",
     "\n",
@@ -807,10 +807,12 @@
     }
    ],
    "source": [
+    "from typing import Optional\n",
+    "\n",
     "from azure.search.documents.models import VectorizableTextQuery\n",
     "\n",
     "\n",
-    "def generate_chat_response(prompt: str, system_message: str = None):\n",
+    "def generate_chat_response(prompt: str, system_message: Optional[str] = None):\n",
     "    \"\"\"\n",
     "    Generates a single-turn chat response using Azure OpenAI Chat.\n",
     "    If you need multi-turn conversation or follow-up queries, you'll have to\n",
diff --git a/docs/examples/rag_haystack.ipynb b/docs/examples/rag_haystack.ipynb
index b954115..2861c96 100644
--- a/docs/examples/rag_haystack.ipynb
+++ b/docs/examples/rag_haystack.ipynb
@@ -351,7 +351,7 @@
     "for source in sources:\n",
     "    if EXPORT_TYPE == ExportType.DOC_CHUNKS:\n",
     "        doc_chunk = DocChunk.model_validate(source.meta[\"dl_meta\"])\n",
-    "        print(f\"- text: {repr(doc_chunk.text)}\")\n",
+    "        print(f\"- text: {doc_chunk.text!r}\")\n",
     "        if doc_chunk.meta.origin:\n",
     "            print(f\"  file: {doc_chunk.meta.origin.filename}\")\n",
     "        if doc_chunk.meta.headings:\n",
diff --git a/docs/examples/rag_langchain.ipynb b/docs/examples/rag_langchain.ipynb
index 37c4170..17fe8e6 100644
--- a/docs/examples/rag_langchain.ipynb
+++ b/docs/examples/rag_langchain.ipynb
@@ -341,7 +341,7 @@
     "print(f\"Question:\\n{resp_dict['input']}\\n\\nAnswer:\\n{clipped_answer}\")\n",
     "for i, doc in enumerate(resp_dict[\"context\"]):\n",
     "    print()\n",
-    "    print(f\"Source {i+1}:\")\n",
+    "    print(f\"Source {i + 1}:\")\n",
     "    print(f\"  text: {json.dumps(clip_text(doc.page_content, threshold=350))}\")\n",
     "    for key in doc.metadata:\n",
     "        if key != \"pk\":\n",
diff --git a/docs/examples/rag_weaviate.ipynb b/docs/examples/rag_weaviate.ipynb
index 7c020f4..627e892 100644
--- a/docs/examples/rag_weaviate.ipynb
+++ b/docs/examples/rag_weaviate.ipynb
@@ -59,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "collapsed": true,
     "id": "u076oUSF_YUG"
@@ -72,12 +72,11 @@
     "%pip install rich\n",
     "%pip install torch\n",
     "\n",
+    "import logging\n",
     "import warnings\n",
     "\n",
     "warnings.filterwarnings(\"ignore\")\n",
     "\n",
-    "import logging\n",
-    "\n",
     "# Suppress Weaviate client logs\n",
     "logging.getLogger(\"weaviate\").setLevel(logging.ERROR)"
    ]
@@ -119,7 +118,7 @@
     "    device = torch.device(\"mps\")\n",
     "    print(\"MPS GPU is enabled.\")\n",
     "else:\n",
-    "    raise EnvironmentError(\n",
+    "    raise OSError(\n",
     "        \"No GPU or MPS device found. Please check your environment and ensure GPU or MPS support is configured.\"\n",
     "    )"
    ]
@@ -226,7 +225,6 @@
     }
    ],
    "source": [
-    "from docling.datamodel.document import ConversionResult\n",
     "from docling.document_converter import DocumentConverter\n",
     "\n",
     "# Instantiate the doc converter\n",
@@ -345,7 +343,7 @@
     "\n",
     "    openai_api_key = os.getenv(openai_api_key_var)\n",
     "    if not openai_api_key:\n",
-    "        raise EnvironmentError(\n",
+    "        raise OSError(\n",
     "            f\"Environment variable '{openai_api_key_var}' is not set. \"\n",
     "            \"Please define it before running this script.\"\n",
     "        )"
@@ -387,7 +385,6 @@
    "outputs": [],
    "source": [
     "import weaviate.classes.config as wc\n",
-    "from weaviate.classes.config import DataType, Property\n",
     "\n",
     "# Define the collection name\n",
     "collection_name = \"docling\"\n",
diff --git a/docs/examples/run_md.py b/docs/examples/run_md.py
index 46be97e..94de14b 100644
--- a/docs/examples/run_md.py
+++ b/docs/examples/run_md.py
@@ -25,9 +25,7 @@ def main():
         document = mdb.convert()
 
         out_path = Path("scratch")
-        print(
-            f"Document {path} converted." f"\nSaved markdown output to: {str(out_path)}"
-        )
+        print(f"Document {path} converted.\nSaved markdown output to: {out_path!s}")
 
         # Export Docling document format to markdowndoc:
         fn = os.path.basename(path)
diff --git a/docs/examples/run_with_accelerator.py b/docs/examples/run_with_accelerator.py
index 6e81e85..a538074 100644
--- a/docs/examples/run_with_accelerator.py
+++ b/docs/examples/run_with_accelerator.py
@@ -1,13 +1,10 @@
 from pathlib import Path
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
     AcceleratorDevice,
     AcceleratorOptions,
     PdfPipelineOptions,
-    TesseractCliOcrOptions,
-    TesseractOcrOptions,
 )
 from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, PdfFormatOption
diff --git a/docs/examples/run_with_formats.py b/docs/examples/run_with_formats.py
index 0eff248..38d7fff 100644
--- a/docs/examples/run_with_formats.py
+++ b/docs/examples/run_with_formats.py
@@ -63,7 +63,7 @@ def main():
         out_path = Path("scratch")
         print(
             f"Document {res.input.file.name} converted."
-            f"\nSaved markdown output to: {str(out_path)}"
+            f"\nSaved markdown output to: {out_path!s}"
         )
         _log.debug(res.document._export_to_indented_text(max_text_len=16))
         # Export Docling document format to markdowndoc:
diff --git a/docs/examples/tesseract_lang_detection.py b/docs/examples/tesseract_lang_detection.py
index 0de0dd6..37859b9 100644
--- a/docs/examples/tesseract_lang_detection.py
+++ b/docs/examples/tesseract_lang_detection.py
@@ -4,7 +4,6 @@ from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import (
     PdfPipelineOptions,
     TesseractCliOcrOptions,
-    TesseractOcrOptions,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
diff --git a/docs/examples/translate.py b/docs/examples/translate.py
index fa39b6d..229d545 100644
--- a/docs/examples/translate.py
+++ b/docs/examples/translate.py
@@ -2,9 +2,9 @@ import logging
 import time
 from pathlib import Path
 
-from docling_core.types.doc import ImageRefMode, PictureItem, TableItem, TextItem
+from docling_core.types.doc import ImageRefMode, TableItem, TextItem
 
-from docling.datamodel.base_models import FigureElement, InputFormat, Table
+from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
 
@@ -15,7 +15,6 @@ IMAGE_RESOLUTION_SCALE = 2.0
 
 # FIXME: put in your favorite translation code ....
 def translate(text: str, src: str = "en", dest: str = "de"):
-
     _log.warning("!!! IMPLEMENT HERE YOUR FAVORITE TRANSLATION CODE!!!")
     # from googletrans import Translator
 
@@ -52,10 +51,9 @@ def main():
         }
     )
 
-    start_time = time.time()
-
     conv_res = doc_converter.convert(input_doc_path)
     conv_doc = conv_res.document
+    doc_filename = conv_res.input.file
 
     # Save markdown with embedded pictures in original text
     md_filename = output_dir / f"{doc_filename}-with-images-orig.md"
diff --git a/docs/examples/visual_grounding.ipynb b/docs/examples/visual_grounding.ipynb
index 4d091da..63200ed 100644
--- a/docs/examples/visual_grounding.ipynb
+++ b/docs/examples/visual_grounding.ipynb
@@ -432,7 +432,7 @@
     "\n",
     "for i, doc in enumerate(resp_dict[\"context\"][:]):\n",
     "    image_by_page = {}\n",
-    "    print(f\"Source {i+1}:\")\n",
+    "    print(f\"Source {i + 1}:\")\n",
     "    print(f\"  text: {json.dumps(clip_text(doc.page_content, threshold=350))}\")\n",
     "    meta = DocMeta.model_validate(doc.metadata[\"dl_meta\"])\n",
     "\n",
diff --git a/docs/examples/vlm_pipeline_api_model.py b/docs/examples/vlm_pipeline_api_model.py
index 33fb72a..504cecc 100644
--- a/docs/examples/vlm_pipeline_api_model.py
+++ b/docs/examples/vlm_pipeline_api_model.py
@@ -10,7 +10,6 @@ from docling.datamodel.pipeline_options import (
     ApiVlmOptions,
     ResponseFormat,
     VlmPipelineOptions,
-    granite_vision_vlm_ollama_conversion_options,
 )
 from docling.document_converter import DocumentConverter, PdfFormatOption
 from docling.pipeline.vlm_pipeline import VlmPipeline
diff --git a/poetry.lock b/poetry.lock
index 2f142b8..22514b6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -692,6 +692,84 @@ traitlets = ">=4"
 [package.extras]
 test = ["pytest"]
 
+[[package]]
+name = "coverage"
+version = "7.8.0"
+description = "Code coverage measurement for Python"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "coverage-7.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2931f66991175369859b5fd58529cd4b73582461877ecfd859b6549869287ffe"},
+    {file = "coverage-7.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:52a523153c568d2c0ef8826f6cc23031dc86cffb8c6aeab92c4ff776e7951b28"},
+    {file = "coverage-7.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c8a5c139aae4c35cbd7cadca1df02ea8cf28a911534fc1b0456acb0b14234f3"},
+    {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5a26c0c795c3e0b63ec7da6efded5f0bc856d7c0b24b2ac84b4d1d7bc578d676"},
+    {file = "coverage-7.8.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821f7bcbaa84318287115d54becb1915eece6918136c6f91045bb84e2f88739d"},
+    {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a321c61477ff8ee705b8a5fed370b5710c56b3a52d17b983d9215861e37b642a"},
+    {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ed2144b8a78f9d94d9515963ed273d620e07846acd5d4b0a642d4849e8d91a0c"},
+    {file = "coverage-7.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:042e7841a26498fff7a37d6fda770d17519982f5b7d8bf5278d140b67b61095f"},
+    {file = "coverage-7.8.0-cp310-cp310-win32.whl", hash = "sha256:f9983d01d7705b2d1f7a95e10bbe4091fabc03a46881a256c2787637b087003f"},
+    {file = "coverage-7.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:5a570cd9bd20b85d1a0d7b009aaf6c110b52b5755c17be6962f8ccd65d1dbd23"},
+    {file = "coverage-7.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7ac22a0bb2c7c49f441f7a6d46c9c80d96e56f5a8bc6972529ed43c8b694e27"},
+    {file = "coverage-7.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bf13d564d310c156d1c8e53877baf2993fb3073b2fc9f69790ca6a732eb4bfea"},
+    {file = "coverage-7.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5761c70c017c1b0d21b0815a920ffb94a670c8d5d409d9b38857874c21f70d7"},
+    {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ff52d790c7e1628241ffbcaeb33e07d14b007b6eb00a19320c7b8a7024c040"},
+    {file = "coverage-7.8.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d39fc4817fd67b3915256af5dda75fd4ee10621a3d484524487e33416c6f3543"},
+    {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b44674870709017e4b4036e3d0d6c17f06a0e6d4436422e0ad29b882c40697d2"},
+    {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8f99eb72bf27cbb167b636eb1726f590c00e1ad375002230607a844d9e9a2318"},
+    {file = "coverage-7.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b571bf5341ba8c6bc02e0baeaf3b061ab993bf372d982ae509807e7f112554e9"},
+    {file = "coverage-7.8.0-cp311-cp311-win32.whl", hash = "sha256:e75a2ad7b647fd8046d58c3132d7eaf31b12d8a53c0e4b21fa9c4d23d6ee6d3c"},
+    {file = "coverage-7.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:3043ba1c88b2139126fc72cb48574b90e2e0546d4c78b5299317f61b7f718b78"},
+    {file = "coverage-7.8.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bbb5cc845a0292e0c520656d19d7ce40e18d0e19b22cb3e0409135a575bf79fc"},
+    {file = "coverage-7.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4dfd9a93db9e78666d178d4f08a5408aa3f2474ad4d0e0378ed5f2ef71640cb6"},
+    {file = "coverage-7.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f017a61399f13aa6d1039f75cd467be388d157cd81f1a119b9d9a68ba6f2830d"},
+    {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0915742f4c82208ebf47a2b154a5334155ed9ef9fe6190674b8a46c2fb89cb05"},
+    {file = "coverage-7.8.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a40fcf208e021eb14b0fac6bdb045c0e0cab53105f93ba0d03fd934c956143a"},
+    {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a1f406a8e0995d654b2ad87c62caf6befa767885301f3b8f6f73e6f3c31ec3a6"},
+    {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:77af0f6447a582fdc7de5e06fa3757a3ef87769fbb0fdbdeba78c23049140a47"},
+    {file = "coverage-7.8.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f2d32f95922927186c6dbc8bc60df0d186b6edb828d299ab10898ef3f40052fe"},
+    {file = "coverage-7.8.0-cp312-cp312-win32.whl", hash = "sha256:769773614e676f9d8e8a0980dd7740f09a6ea386d0f383db6821df07d0f08545"},
+    {file = "coverage-7.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:e5d2b9be5b0693cf21eb4ce0ec8d211efb43966f6657807f6859aab3814f946b"},
+    {file = "coverage-7.8.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5ac46d0c2dd5820ce93943a501ac5f6548ea81594777ca585bf002aa8854cacd"},
+    {file = "coverage-7.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:771eb7587a0563ca5bb6f622b9ed7f9d07bd08900f7589b4febff05f469bea00"},
+    {file = "coverage-7.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42421e04069fb2cbcbca5a696c4050b84a43b05392679d4068acbe65449b5c64"},
+    {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:554fec1199d93ab30adaa751db68acec2b41c5602ac944bb19187cb9a41a8067"},
+    {file = "coverage-7.8.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aaeb00761f985007b38cf463b1d160a14a22c34eb3f6a39d9ad6fc27cb73008"},
+    {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:581a40c7b94921fffd6457ffe532259813fc68eb2bdda60fa8cc343414ce3733"},
+    {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f319bae0321bc838e205bf9e5bc28f0a3165f30c203b610f17ab5552cff90323"},
+    {file = "coverage-7.8.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:04bfec25a8ef1c5f41f5e7e5c842f6b615599ca8ba8391ec33a9290d9d2db3a3"},
+    {file = "coverage-7.8.0-cp313-cp313-win32.whl", hash = "sha256:dd19608788b50eed889e13a5d71d832edc34fc9dfce606f66e8f9f917eef910d"},
+    {file = "coverage-7.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:a9abbccd778d98e9c7e85038e35e91e67f5b520776781d9a1e2ee9d400869487"},
+    {file = "coverage-7.8.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:18c5ae6d061ad5b3e7eef4363fb27a0576012a7447af48be6c75b88494c6cf25"},
+    {file = "coverage-7.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:95aa6ae391a22bbbce1b77ddac846c98c5473de0372ba5c463480043a07bff42"},
+    {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e013b07ba1c748dacc2a80e69a46286ff145935f260eb8c72df7185bf048f502"},
+    {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d766a4f0e5aa1ba056ec3496243150698dc0481902e2b8559314368717be82b1"},
+    {file = "coverage-7.8.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad80e6b4a0c3cb6f10f29ae4c60e991f424e6b14219d46f1e7d442b938ee68a4"},
+    {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b87eb6fc9e1bb8f98892a2458781348fa37e6925f35bb6ceb9d4afd54ba36c73"},
+    {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:d1ba00ae33be84066cfbe7361d4e04dec78445b2b88bdb734d0d1cbab916025a"},
+    {file = "coverage-7.8.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f3c38e4e5ccbdc9198aecc766cedbb134b2d89bf64533973678dfcf07effd883"},
+    {file = "coverage-7.8.0-cp313-cp313t-win32.whl", hash = "sha256:379fe315e206b14e21db5240f89dc0774bdd3e25c3c58c2c733c99eca96f1ada"},
+    {file = "coverage-7.8.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2e4b6b87bb0c846a9315e3ab4be2d52fac905100565f4b92f02c445c8799e257"},
+    {file = "coverage-7.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fa260de59dfb143af06dcf30c2be0b200bed2a73737a8a59248fcb9fa601ef0f"},
+    {file = "coverage-7.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:96121edfa4c2dfdda409877ea8608dd01de816a4dc4a0523356067b305e4e17a"},
+    {file = "coverage-7.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b8af63b9afa1031c0ef05b217faa598f3069148eeee6bb24b79da9012423b82"},
+    {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:89b1f4af0d4afe495cd4787a68e00f30f1d15939f550e869de90a86efa7e0814"},
+    {file = "coverage-7.8.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94ec0be97723ae72d63d3aa41961a0b9a6f5a53ff599813c324548d18e3b9e8c"},
+    {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:8a1d96e780bdb2d0cbb297325711701f7c0b6f89199a57f2049e90064c29f6bd"},
+    {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f1d8a2a57b47142b10374902777e798784abf400a004b14f1b0b9eaf1e528ba4"},
+    {file = "coverage-7.8.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cf60dd2696b457b710dd40bf17ad269d5f5457b96442f7f85722bdb16fa6c899"},
+    {file = "coverage-7.8.0-cp39-cp39-win32.whl", hash = "sha256:be945402e03de47ba1872cd5236395e0f4ad635526185a930735f66710e1bd3f"},
+    {file = "coverage-7.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:90e7fbc6216ecaffa5a880cdc9c77b7418c1dcb166166b78dbc630d07f278cc3"},
+    {file = "coverage-7.8.0-pp39.pp310.pp311-none-any.whl", hash = "sha256:b8194fb8e50d556d5849753de991d390c5a1edeeba50f68e3a9253fbd8bf8ccd"},
+    {file = "coverage-7.8.0-py3-none-any.whl", hash = "sha256:dbf364b4c5e7bae9250528167dfe40219b62e2d573c854d74be213e1e52069f7"},
+    {file = "coverage-7.8.0.tar.gz", hash = "sha256:7a3d62b3b03b4b6fd41a085f3574874cf946cb4604d2b4d3e8dca8cd570ca501"},
+]
+
+[package.dependencies]
+tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
+
+[package.extras]
+toml = ["tomli"]
+
 [[package]]
 name = "cryptography"
 version = "43.0.3"
@@ -5073,6 +5151,24 @@ tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
 [package.extras]
 testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 
+[[package]]
+name = "pytest-cov"
+version = "6.1.1"
+description = "Pytest plugin for measuring coverage."
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "pytest_cov-6.1.1-py3-none-any.whl", hash = "sha256:bddf29ed2d0ab6f4df17b4c55b0a657287db8684af9c42ea546b21b1041b3dde"},
+    {file = "pytest_cov-6.1.1.tar.gz", hash = "sha256:46935f7aaefba760e716c2ebfbe1c216240b9592966e7da99ea8292d4d3e2a0a"},
+]
+
+[package.dependencies]
+coverage = {version = ">=7.5", extras = ["toml"]}
+pytest = ">=4.6"
+
+[package.extras]
+testing = ["fields", "hunter", "process-tests", "pytest-xdist", "virtualenv"]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.6.1"
@@ -7882,4 +7978,4 @@ vlm = ["accelerate", "transformers", "transformers"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "d2a8f7997b9ffb249ad26ba492b766d580bdb0072d50e76b0afd92496e983e96"
+content-hash = "b36037ec17dc4b6d5197a2f63a1367e05bf888b4fa97e2e2e8c29c217741d69c"
diff --git a/pyproject.toml b/pyproject.toml
index 5091afc..148f52b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -110,6 +110,8 @@ ipywidgets = "^8.1.5"
 nbqa = "^1.9.0"
 types-openpyxl = "^3.1.5.20241114"
 types-tqdm = "^4.67.0.20241221"
+coverage = "^7.6.2"
+pytest-cov = "^6.0.0"
 
 [tool.poetry.group.docs.dependencies]
 mkdocs-material = "^9.5.40"
@@ -164,15 +166,82 @@ docling-tools = "docling.cli.tools:app"
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
-[tool.black]
+[tool.ruff]
+target-version = "py39"
 line-length = 88
-target-version = ["py39"]
-include = '\.pyi?$'
+respect-gitignore = true
 
-[tool.isort]
-profile = "black"
-line_length = 88
-py_version = 39
+# extend-exclude = [
+#     "tests",
+# ]
+
+[tool.ruff.format]
+skip-magic-trailing-comma = false
+
+[tool.ruff.lint]
+select = [
+    # "B",  # flake8-bugbear
+    "C",  # flake8-comprehensions
+    "C9",  # mccabe
+    # "D",  # flake8-docstrings
+    "E",  # pycodestyle errors (default)
+    "F",  # pyflakes (default)
+    "I",  # isort
+    "PD", # pandas-vet
+    "PIE", # pie
+    # "PTH", # pathlib
+    "Q",  # flake8-quotes
+    # "RET", # return
+    "RUF", # Enable all ruff-specific checks
+    # "SIM", # simplify
+    "S307", # eval
+    # "T20",  # (disallow print statements) keep debugging statements out of the codebase
+    "W",  # pycodestyle warnings
+    "ASYNC", # async
+    "UP", # pyupgrade
+]
+
+ignore = [
+    "C408",  # Unnecessary `dict()` call (rewrite as a literal)
+    "E501",  # Line too long, handled by ruff formatter
+    "D107", # "Missing docstring in __init__",
+    "F401",  # imported but unused; consider using `importlib.util.find_spec` to test for "
+    "F811", # "redefinition of the same function"
+    "PL", # Pylint
+    "RUF012", # Mutable Class Attributes
+    "UP006",  # List vs list, etc
+    "UP007", # Option and Union
+    "UP035",  # `typing.Set` is deprecated, use `set` instead"
+]
+
+#extend-select = []
+
+[tool.ruff.lint.pep8-naming]
+classmethod-decorators = [
+    # Allow Pydantic's `@validator` decorator to trigger class method treatment.
+    "pydantic.validator",
+]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401"]
+"tests/*.py" = ["ASYNC"] # Disable ASYNC check for tests
+
+[tool.ruff.lint.mccabe]
+max-complexity = 20
+
+# [tool.ruff.lint.isort.sections]
+# "docling" = ["docling_core", "docling_ibm_models", "docling_parse"]
+
+[tool.ruff.lint.isort]
+combine-as-imports = true
+# section-order = [
+#   "future",
+#   "standard-library",
+#   "third-party",
+#   "docling",
+#   "first-party",
+#   "local-folder",
+# ]
 
 [tool.mypy]
 pretty = true
@@ -200,10 +269,6 @@ module = [
 ]
 ignore_missing_imports = true
 
-[tool.flake8]
-max-line-length = 88
-extend-ignore = ["E203", "E501"]
-
 [tool.semantic_release]
 # for default values check:
 # https://github.com/python-semantic-release/python-semantic-release/blob/v7.32.2/semantic_release/defaults.cfg
diff --git a/tests/test_backend_asciidoc.py b/tests/test_backend_asciidoc.py
index 4574a22..fc047ba 100644
--- a/tests/test_backend_asciidoc.py
+++ b/tests/test_backend_asciidoc.py
@@ -19,7 +19,6 @@ def _get_backend(fname):
 
 
 def test_asciidocs_examples():
-
     fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc"))
 
     for fname in fnames:
@@ -38,8 +37,8 @@ def test_asciidocs_examples():
         print("\n\n", pred_mddoc)
 
         if os.path.exists(gname):
-            with open(gname, "r") as fr:
-                true_mddoc = fr.read()
+            with open(gname) as fr:
+                fr.read()
 
             # assert pred_mddoc == true_mddoc, "pred_mddoc!=true_mddoc for asciidoc"
         else:
diff --git a/tests/test_backend_csv.py b/tests/test_backend_csv.py
index 2eee27b..d929ae1 100644
--- a/tests/test_backend_csv.py
+++ b/tests/test_backend_csv.py
@@ -1,5 +1,3 @@
-import json
-import os
 from pathlib import Path
 
 from pytest import warns
@@ -15,22 +13,19 @@ GENERATE = GEN_TEST_DATA
 
 
 def get_csv_paths():
-
     # Define the directory you want to search
-    directory = Path(f"./tests/data/csv/")
+    directory = Path("./tests/data/csv/")
 
     # List all CSV files in the directory and its subdirectories
     return sorted(directory.rglob("*.csv"))
 
 
 def get_csv_path(name: str):
-
     # Return the matching CSV file path
     return Path(f"./tests/data/csv/{name}.csv")
 
 
 def get_converter():
-
     converter = DocumentConverter(allowed_formats=[InputFormat.CSV])
 
     return converter
@@ -55,9 +50,9 @@ def test_e2e_valid_csv_conversions():
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt"
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt"), (
+            "export to indented-text"
+        )
 
         assert verify_document(
             pred_doc=doc,
diff --git a/tests/test_backend_docling_parse.py b/tests/test_backend_docling_parse.py
index 3c21479..d6f804c 100644
--- a/tests/test_backend_docling_parse.py
+++ b/tests/test_backend_docling_parse.py
@@ -32,7 +32,7 @@ def test_text_cell_counts():
 
     doc_backend = _get_backend(pdf_doc)
 
-    for page_index in range(0, doc_backend.page_count()):
+    for page_index in range(doc_backend.page_count()):
         last_cell_count = None
         for i in range(10):
             page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
@@ -42,9 +42,9 @@ def test_text_cell_counts():
                 last_cell_count = len(cells)
 
             if len(cells) != last_cell_count:
-                assert (
-                    False
-                ), "Loading page multiple times yielded non-identical text cell counts"
+                assert False, (
+                    "Loading page multiple times yielded non-identical text cell counts"
+                )
             last_cell_count = len(cells)
 
 
@@ -66,7 +66,7 @@ def test_crop_page_image(test_doc_path):
     page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
 
     # Crop out "Figure 1" from the DocLayNet paper
-    im = page_backend.get_page_image(
+    page_backend.get_page_image(
         scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
     )
     # im.show()
diff --git a/tests/test_backend_docling_parse_v2.py b/tests/test_backend_docling_parse_v2.py
index ee0e5c7..972f3b5 100644
--- a/tests/test_backend_docling_parse_v2.py
+++ b/tests/test_backend_docling_parse_v2.py
@@ -31,7 +31,7 @@ def test_text_cell_counts():
 
     doc_backend = _get_backend(pdf_doc)
 
-    for page_index in range(0, doc_backend.page_count()):
+    for page_index in range(doc_backend.page_count()):
         last_cell_count = None
         for i in range(10):
             page_backend: DoclingParseV2PageBackend = doc_backend.load_page(0)
@@ -41,9 +41,9 @@ def test_text_cell_counts():
                 last_cell_count = len(cells)
 
             if len(cells) != last_cell_count:
-                assert (
-                    False
-                ), "Loading page multiple times yielded non-identical text cell counts"
+                assert False, (
+                    "Loading page multiple times yielded non-identical text cell counts"
+                )
             last_cell_count = len(cells)
 
 
@@ -65,7 +65,7 @@ def test_crop_page_image(test_doc_path):
     page_backend: DoclingParseV2PageBackend = doc_backend.load_page(0)
 
     # Crop out "Figure 1" from the DocLayNet paper
-    im = page_backend.get_page_image(
+    page_backend.get_page_image(
         scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
     )
     # im.show()
diff --git a/tests/test_backend_docling_parse_v4.py b/tests/test_backend_docling_parse_v4.py
index fcb551e..35c4eab 100644
--- a/tests/test_backend_docling_parse_v4.py
+++ b/tests/test_backend_docling_parse_v4.py
@@ -31,7 +31,7 @@ def test_text_cell_counts():
 
     doc_backend = _get_backend(pdf_doc)
 
-    for page_index in range(0, doc_backend.page_count()):
+    for page_index in range(doc_backend.page_count()):
         last_cell_count = None
         for i in range(10):
             page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)
@@ -41,9 +41,9 @@ def test_text_cell_counts():
                 last_cell_count = len(cells)
 
             if len(cells) != last_cell_count:
-                assert (
-                    False
-                ), "Loading page multiple times yielded non-identical text cell counts"
+                assert False, (
+                    "Loading page multiple times yielded non-identical text cell counts"
+                )
             last_cell_count = len(cells)
 
 
@@ -65,7 +65,7 @@ def test_crop_page_image(test_doc_path):
     page_backend: DoclingParseV4PageBackend = doc_backend.load_page(0)
 
     # Crop out "Figure 1" from the DocLayNet paper
-    im = page_backend.get_page_image(
+    page_backend.get_page_image(
         scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
     )
     # im.show()
diff --git a/tests/test_backend_html.py b/tests/test_backend_html.py
index 5f5e740..18254a7 100644
--- a/tests/test_backend_html.py
+++ b/tests/test_backend_html.py
@@ -105,7 +105,6 @@ def test_ordered_lists():
 
 
 def get_html_paths():
-
     # Define the directory you want to search
     directory = Path("./tests/data/html/")
 
@@ -115,14 +114,12 @@ def get_html_paths():
 
 
 def get_converter():
-
     converter = DocumentConverter(allowed_formats=[InputFormat.HTML])
 
     return converter
 
 
 def test_e2e_html_conversions():
-
     html_paths = get_html_paths()
     converter = get_converter()
 
@@ -138,15 +135,15 @@ def test_e2e_html_conversions():
         doc: DoclingDocument = conv_result.document
 
         pred_md: str = doc.export_to_markdown()
-        assert verify_export(
-            pred_md, str(gt_path) + ".md", generate=GENERATE
-        ), "export to md"
+        assert verify_export(pred_md, str(gt_path) + ".md", generate=GENERATE), (
+            "export to md"
+        )
 
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt", generate=GENERATE
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt", generate=GENERATE), (
+            "export to indented-text"
+        )
 
         assert verify_document(doc, str(gt_path) + ".json", GENERATE)
diff --git a/tests/test_backend_jats.py b/tests/test_backend_jats.py
index d209431..a4373be 100644
--- a/tests/test_backend_jats.py
+++ b/tests/test_backend_jats.py
@@ -15,7 +15,7 @@ GENERATE = GEN_TEST_DATA
 
 
 def get_pubmed_paths():
-    directory = Path(os.path.dirname(__file__) + f"/data/pubmed/")
+    directory = Path(os.path.dirname(__file__) + "/data/pubmed/")
     xml_files = sorted(directory.rglob("*.xml"))
     return xml_files
 
@@ -47,9 +47,9 @@ def test_e2e_pubmed_conversions(use_stream=False):
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt"
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt"), (
+            "export to indented-text"
+        )
 
         assert verify_document(doc, str(gt_path) + ".json", GENERATE), "export to json"
 
diff --git a/tests/test_backend_msexcel.py b/tests/test_backend_msexcel.py
index 0604429..65f636e 100644
--- a/tests/test_backend_msexcel.py
+++ b/tests/test_backend_msexcel.py
@@ -17,7 +17,6 @@ GENERATE = GEN_TEST_DATA
 
 
 def get_xlsx_paths():
-
     # Define the directory you want to search
     directory = Path("./tests/data/xlsx/")
 
@@ -27,7 +26,6 @@ def get_xlsx_paths():
 
 
 def get_converter():
-
     converter = DocumentConverter(allowed_formats=[InputFormat.XLSX])
 
     return converter
@@ -65,13 +63,13 @@ def test_e2e_xlsx_conversions(documents) -> None:
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt"
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt"), (
+            "export to indented-text"
+        )
 
-        assert verify_document(
-            doc, str(gt_path) + ".json", GENERATE
-        ), "document document"
+        assert verify_document(doc, str(gt_path) + ".json", GENERATE), (
+            "document document"
+        )
 
 
 def test_pages(documents) -> None:
@@ -81,7 +79,7 @@ def test_pages(documents) -> None:
         documents: The paths and converted documents.
     """
     # number of pages from the backend method
-    path = [item for item in get_xlsx_paths() if item.stem == "test-01"][0]
+    path = next(item for item in get_xlsx_paths() if item.stem == "test-01")
     in_doc = InputDocument(
         path_or_stream=path,
         format=InputFormat.XLSX,
@@ -92,7 +90,7 @@ def test_pages(documents) -> None:
     assert backend.page_count() == 3
 
     # number of pages from the converted document
-    doc = [item for path, item in documents if path.stem == "test-01"][0]
+    doc = next(item for path, item in documents if path.stem == "test-01")
     assert len(doc.pages) == 3
 
     # page sizes as number of cells
diff --git a/tests/test_backend_msword.py b/tests/test_backend_msword.py
index 5c43ccf..c50e071 100644
--- a/tests/test_backend_msword.py
+++ b/tests/test_backend_msword.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 
 from docling.backend.msword_backend import MsWordDocumentBackend
@@ -43,7 +42,6 @@ def test_heading_levels():
 
 
 def get_docx_paths():
-
     # Define the directory you want to search
     directory = Path("./tests/data/docx/")
 
@@ -53,14 +51,12 @@ def get_docx_paths():
 
 
 def get_converter():
-
     converter = DocumentConverter(allowed_formats=[InputFormat.DOCX])
 
     return converter
 
 
 def test_e2e_docx_conversions():
-
     docx_paths = get_docx_paths()
     converter = get_converter()
 
@@ -76,20 +72,20 @@ def test_e2e_docx_conversions():
         doc: DoclingDocument = conv_result.document
 
         pred_md: str = doc.export_to_markdown()
-        assert verify_export(
-            pred_md, str(gt_path) + ".md", generate=GENERATE
-        ), "export to md"
+        assert verify_export(pred_md, str(gt_path) + ".md", generate=GENERATE), (
+            "export to md"
+        )
 
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt", generate=GENERATE
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt", generate=GENERATE), (
+            "export to indented-text"
+        )
 
-        assert verify_document(
-            doc, str(gt_path) + ".json", generate=GENERATE
-        ), "document document"
+        assert verify_document(doc, str(gt_path) + ".json", generate=GENERATE), (
+            "document document"
+        )
 
         if docx_path.name == "word_tables.docx":
             pred_html: str = doc.export_to_html()
diff --git a/tests/test_backend_patent_uspto.py b/tests/test_backend_patent_uspto.py
index aebc01d..ace6d3a 100644
--- a/tests/test_backend_patent_uspto.py
+++ b/tests/test_backend_patent_uspto.py
@@ -109,27 +109,27 @@ def test_patent_groundtruth(patents, groundtruth):
         md_name = path.stem + ".md"
         if md_name in gt_names:
             pred_md = doc.export_to_markdown()
-            assert (
-                pred_md == gt_names[md_name]
-            ), f"Markdown file mismatch against groundtruth {md_name}"
+            assert pred_md == gt_names[md_name], (
+                f"Markdown file mismatch against groundtruth {md_name}"
+            )
         json_path = path.with_suffix(".json")
         if json_path.stem in gt_names:
-            assert verify_document(
-                doc, str(json_path), GENERATE
-            ), f"JSON file mismatch against groundtruth {json_path}"
+            assert verify_document(doc, str(json_path), GENERATE), (
+                f"JSON file mismatch against groundtruth {json_path}"
+            )
         itxt_name = path.stem + ".itxt"
         if itxt_name in gt_names:
             pred_itxt = doc._export_to_indented_text()
-            assert (
-                pred_itxt == gt_names[itxt_name]
-            ), f"Indented text file mismatch against groundtruth {itxt_name}"
+            assert pred_itxt == gt_names[itxt_name], (
+                f"Indented text file mismatch against groundtruth {itxt_name}"
+            )
 
 
 def test_tables(tables):
     """Test the table parser."""
     # CHECK table in file tables_20180000016.xml
     file_name = "tables_ipa20180000016.xml"
-    file_table = [item[1] for item in tables if item[0].name == file_name][0]
+    file_table = next(item[1] for item in tables if item[0].name == file_name)
     assert file_table.num_rows == 13
     assert file_table.num_cols == 10
     assert len(file_table.table_cells) == 130
@@ -140,7 +140,7 @@ def test_patent_uspto_ice(patents):
 
     # CHECK application doc number 20200022300
     file_name = "ipa20200022300.xml"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     if GENERATE:
         _generate_groundtruth(doc, Path(file_name).stem)
 
@@ -278,7 +278,7 @@ def test_patent_uspto_ice(patents):
 
     # CHECK application doc number 20180000016 for HTML entities, level 2 headings, tables
     file_name = "ipa20180000016.xml"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     if GENERATE:
         _generate_groundtruth(doc, Path(file_name).stem)
 
@@ -348,7 +348,7 @@ def test_patent_uspto_ice(patents):
 
     # CHECK application doc number 20110039701 for complex long tables
     file_name = "ipa20110039701.xml"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     assert doc.name == file_name
     assert len(doc.tables) == 17
 
@@ -358,7 +358,7 @@ def test_patent_uspto_grant_v2(patents):
 
     # CHECK application doc number 06442728
     file_name = "pg06442728.xml"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     if GENERATE:
         _generate_groundtruth(doc, Path(file_name).stem)
 
@@ -376,12 +376,12 @@ def test_patent_uspto_grant_v2(patents):
     assert isinstance(texts[2], TextItem)
     assert texts[2].text == (
         "An interleaver receives incoming data frames of size N. The interleaver "
-        "indexes the elements of the frame with an N₁×N₂ index array. The interleaver "
+        "indexes the elements of the frame with an N₁×N₂ index array. The interleaver "  # noqa: RUF001
         "then effectively rearranges (permutes) the data by permuting the rows of the "
-        "index array. The interleaver employs the equation I(j,k)=I(j,αjk+βj)modP) to "
+        "index array. The interleaver employs the equation I(j,k)=I(j,αjk+βj)modP) to "  # noqa: RUF001
         "permute the columns (indexed by k) of each row (indexed by j). P is at least "
         "equal to N₂, βj is a constant which may be different for each row, and each "
-        "αj is a relative prime number relative to P. After permuting, the "
+        "αj is a relative prime number relative to P. After permuting, the "  # noqa: RUF001
         "interleaver outputs the data in a different order than received (e.g., "
         "receives sequentially row by row, outputs sequentially each column by column)."
     )
@@ -402,7 +402,7 @@ def test_patent_uspto_app_v1(patents):
 
     # CHECK application doc number 20010031492
     file_name = "pa20010031492.xml"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     if GENERATE:
         _generate_groundtruth(doc, Path(file_name).stem)
 
@@ -432,7 +432,7 @@ def test_patent_uspto_grant_aps(patents):
 
     # CHECK application doc number 057006474
     file_name = "pftaps057006474.txt"
-    doc = [item[1] for item in patents if item[0].name == file_name][0]
+    doc = next(item[1] for item in patents if item[0].name == file_name)
     if GENERATE:
         _generate_groundtruth(doc, Path(file_name).stem)
 
diff --git a/tests/test_backend_pdfium.py b/tests/test_backend_pdfium.py
index 10a2b9e..317cdee 100644
--- a/tests/test_backend_pdfium.py
+++ b/tests/test_backend_pdfium.py
@@ -32,7 +32,7 @@ def test_text_cell_counts():
 
     doc_backend = _get_backend(pdf_doc)
 
-    for page_index in range(0, doc_backend.page_count()):
+    for page_index in range(doc_backend.page_count()):
         last_cell_count = None
         for i in range(10):
             page_backend: PyPdfiumPageBackend = doc_backend.load_page(0)
@@ -42,9 +42,9 @@ def test_text_cell_counts():
                 last_cell_count = len(cells)
 
             if len(cells) != last_cell_count:
-                assert (
-                    False
-                ), "Loading page multiple times yielded non-identical text cell counts"
+                assert False, (
+                    "Loading page multiple times yielded non-identical text cell counts"
+                )
             last_cell_count = len(cells)
 
 
@@ -66,7 +66,7 @@ def test_crop_page_image(test_doc_path):
     page_backend: PyPdfiumPageBackend = doc_backend.load_page(0)
 
     # Crop out "Figure 1" from the DocLayNet paper
-    im = page_backend.get_page_image(
+    page_backend.get_page_image(
         scale=2, cropbox=BoundingBox(l=317, t=246, r=574, b=527)
     )
     # im.show()
diff --git a/tests/test_backend_pptx.py b/tests/test_backend_pptx.py
index 947e9e6..4f73c87 100644
--- a/tests/test_backend_pptx.py
+++ b/tests/test_backend_pptx.py
@@ -1,4 +1,3 @@
-import os
 from pathlib import Path
 
 from docling.datamodel.base_models import InputFormat
@@ -12,7 +11,6 @@ GENERATE = GEN_TEST_DATA
 
 
 def get_pptx_paths():
-
     # Define the directory you want to search
     directory = Path("./tests/data/pptx/")
 
@@ -22,14 +20,12 @@ def get_pptx_paths():
 
 
 def get_converter():
-
     converter = DocumentConverter(allowed_formats=[InputFormat.PPTX])
 
     return converter
 
 
 def test_e2e_pptx_conversions():
-
     pptx_paths = get_pptx_paths()
     converter = get_converter()
 
@@ -50,10 +46,10 @@ def test_e2e_pptx_conversions():
         pred_itxt: str = doc._export_to_indented_text(
             max_text_len=70, explicit_tables=False
         )
-        assert verify_export(
-            pred_itxt, str(gt_path) + ".itxt"
-        ), "export to indented-text"
+        assert verify_export(pred_itxt, str(gt_path) + ".itxt"), (
+            "export to indented-text"
+        )
 
-        assert verify_document(
-            doc, str(gt_path) + ".json", GENERATE
-        ), "document document"
+        assert verify_document(doc, str(gt_path) + ".json", GENERATE), (
+            "document document"
+        )
diff --git a/tests/test_code_formula.py b/tests/test_code_formula.py
index 085e094..e5d52da 100644
--- a/tests/test_code_formula.py
+++ b/tests/test_code_formula.py
@@ -3,7 +3,6 @@ from pathlib import Path
 from docling_core.types.doc import CodeItem, TextItem
 from docling_core.types.doc.labels import CodeLanguageLabel, DocItemLabel
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions
@@ -12,7 +11,6 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
 
 
 def get_converter():
-
     pipeline_options = PdfPipelineOptions()
     pipeline_options.generate_page_images = True
 
diff --git a/tests/test_document_picture_classifier.py b/tests/test_document_picture_classifier.py
index 2ac1da9..5dc5e92 100644
--- a/tests/test_document_picture_classifier.py
+++ b/tests/test_document_picture_classifier.py
@@ -2,7 +2,6 @@ from pathlib import Path
 
 from docling_core.types.doc import PictureClassificationData
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import PdfPipelineOptions
@@ -11,7 +10,6 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
 
 
 def get_converter():
-
     pipeline_options = PdfPipelineOptions()
     pipeline_options.generate_page_images = True
 
@@ -49,32 +47,32 @@ def test_picture_classifier():
 
     res = results[0]
     assert len(res.annotations) == 1
-    assert type(res.annotations[0]) == PictureClassificationData
+    assert isinstance(res.annotations[0], PictureClassificationData)
     classification_data = res.annotations[0]
     assert classification_data.provenance == "DocumentPictureClassifier"
-    assert (
-        len(classification_data.predicted_classes) == 16
-    ), "Number of predicted classes is not equal to 16"
+    assert len(classification_data.predicted_classes) == 16, (
+        "Number of predicted classes is not equal to 16"
+    )
     confidences = [pred.confidence for pred in classification_data.predicted_classes]
-    assert confidences == sorted(
-        confidences, reverse=True
-    ), "Predictions are not sorted in descending order of confidence"
-    assert (
-        classification_data.predicted_classes[0].class_name == "bar_chart"
-    ), "The prediction is wrong for the bar chart image."
+    assert confidences == sorted(confidences, reverse=True), (
+        "Predictions are not sorted in descending order of confidence"
+    )
+    assert classification_data.predicted_classes[0].class_name == "bar_chart", (
+        "The prediction is wrong for the bar chart image."
+    )
 
     res = results[1]
     assert len(res.annotations) == 1
-    assert type(res.annotations[0]) == PictureClassificationData
+    assert isinstance(res.annotations[0], PictureClassificationData)
     classification_data = res.annotations[0]
     assert classification_data.provenance == "DocumentPictureClassifier"
-    assert (
-        len(classification_data.predicted_classes) == 16
-    ), "Number of predicted classes is not equal to 16"
+    assert len(classification_data.predicted_classes) == 16, (
+        "Number of predicted classes is not equal to 16"
+    )
     confidences = [pred.confidence for pred in classification_data.predicted_classes]
-    assert confidences == sorted(
-        confidences, reverse=True
-    ), "Predictions are not sorted in descending order of confidence"
-    assert (
-        classification_data.predicted_classes[0].class_name == "map"
-    ), "The prediction is wrong for the bar chart image."
+    assert confidences == sorted(confidences, reverse=True), (
+        "Predictions are not sorted in descending order of confidence"
+    )
+    assert classification_data.predicted_classes[0].class_name == "map", (
+        "The prediction is wrong for the bar chart image."
+    )
diff --git a/tests/test_e2e_conversion.py b/tests/test_e2e_conversion.py
index 590558f..5dc2e89 100644
--- a/tests/test_e2e_conversion.py
+++ b/tests/test_e2e_conversion.py
@@ -1,7 +1,6 @@
 from pathlib import Path
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import AcceleratorDevice, PdfPipelineOptions
@@ -15,7 +14,6 @@ GENERATE_V2 = GEN_TEST_DATA
 
 
 def get_pdf_paths():
-
     # Define the directory you want to search
     directory = Path("./tests/data/pdf/")
 
@@ -25,7 +23,6 @@ def get_pdf_paths():
 
 
 def get_converter():
-
     pipeline_options = PdfPipelineOptions()
     pipeline_options.do_ocr = False
     pipeline_options.do_table_structure = True
@@ -45,7 +42,6 @@ def get_converter():
 
 
 def test_e2e_pdfs_conversions():
-
     pdf_paths = get_pdf_paths()
     converter = get_converter()
 
diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py
index 985a625..63570d0 100644
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@@ -3,7 +3,6 @@ from pathlib import Path
 from typing import List
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.document import ConversionResult
 from docling.datamodel.pipeline_options import (
diff --git a/tests/test_input_doc.py b/tests/test_input_doc.py
index 946ad06..94a6887 100644
--- a/tests/test_input_doc.py
+++ b/tests/test_input_doc.py
@@ -12,10 +12,9 @@ from docling.document_converter import PdfFormatOption
 
 
 def test_in_doc_from_valid_path():
-
     test_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
     doc = _make_input_doc(test_doc_path)
-    assert doc.valid == True
+    assert doc.valid is True
 
 
 def test_in_doc_from_invalid_path():
@@ -23,29 +22,26 @@ def test_in_doc_from_invalid_path():
 
     doc = _make_input_doc(test_doc_path)
 
-    assert doc.valid == False
+    assert doc.valid is False
 
 
 def test_in_doc_from_valid_buf():
-
     buf = BytesIO(Path("./tests/data/pdf/2206.01062.pdf").open("rb").read())
     stream = DocumentStream(name="my_doc.pdf", stream=buf)
 
     doc = _make_input_doc_from_stream(stream)
-    assert doc.valid == True
+    assert doc.valid is True
 
 
 def test_in_doc_from_invalid_buf():
-
     buf = BytesIO(b"")
     stream = DocumentStream(name="my_doc.pdf", stream=buf)
 
     doc = _make_input_doc_from_stream(stream)
-    assert doc.valid == False
+    assert doc.valid is False
 
 
 def test_image_in_pdf_backend():
-
     in_doc = InputDocument(
         path_or_stream=Path("tests/data/2305.03393v1-pg9-img.png"),
         format=InputFormat.IMAGE,
@@ -76,7 +72,6 @@ def test_image_in_pdf_backend():
 
 
 def test_in_doc_with_page_range():
-
     test_doc_path = Path("./tests/data/pdf/2206.01062.pdf")
     limits = DocumentLimits()
     limits.page_range = (1, 10)
@@ -87,7 +82,7 @@ def test_in_doc_with_page_range():
         backend=PyPdfiumDocumentBackend,
         limits=limits,
     )
-    assert doc.valid == True
+    assert doc.valid is True
 
     limits.page_range = (9, 9)
 
@@ -97,7 +92,7 @@ def test_in_doc_with_page_range():
         backend=PyPdfiumDocumentBackend,
         limits=limits,
     )
-    assert doc.valid == True
+    assert doc.valid is True
 
     limits.page_range = (11, 12)
 
@@ -107,7 +102,7 @@ def test_in_doc_with_page_range():
         backend=PyPdfiumDocumentBackend,
         limits=limits,
     )
-    assert doc.valid == False
+    assert doc.valid is False
 
 
 def test_guess_format(tmp_path):
@@ -192,17 +187,17 @@ def test_guess_format(tmp_path):
     )
     doc_path = temp_dir / "docling_test.xml"
     doc_path.write_text(xml_content, encoding="utf-8")
-    assert dci._guess_format(doc_path) == None
+    assert dci._guess_format(doc_path) is None
     buf = BytesIO(Path(doc_path).open("rb").read())
     stream = DocumentStream(name="docling_test.xml", stream=buf)
-    assert dci._guess_format(stream) == None
+    assert dci._guess_format(stream) is None
 
     # Invalid USPTO patent (as plain text)
     stream = DocumentStream(name="pftaps057006474.txt", stream=BytesIO(b"xyz"))
-    assert dci._guess_format(stream) == None
+    assert dci._guess_format(stream) is None
     doc_path = temp_dir / "pftaps_wrong.txt"
     doc_path.write_text("xyz", encoding="utf-8")
-    assert dci._guess_format(doc_path) == None
+    assert dci._guess_format(doc_path) is None
 
     # Valid Docling JSON
     test_str = '{"name": ""}'
diff --git a/tests/test_interfaces.py b/tests/test_interfaces.py
index 29130c5..8d68f29 100644
--- a/tests/test_interfaces.py
+++ b/tests/test_interfaces.py
@@ -4,7 +4,6 @@ from pathlib import Path
 import pytest
 
 from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import DocumentStream, InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
@@ -16,14 +15,12 @@ GENERATE = GEN_TEST_DATA
 
 
 def get_pdf_path():
-
     pdf_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf")
     return pdf_path
 
 
 @pytest.fixture
 def converter():
-
     pipeline_options = PdfPipelineOptions()
     pipeline_options.do_ocr = False
     pipeline_options.do_table_structure = True
@@ -42,7 +39,6 @@ def converter():
 
 
 def test_convert_path(converter: DocumentConverter):
-
     pdf_path = get_pdf_path()
     print(f"converting {pdf_path}")
 
@@ -56,7 +52,6 @@ def test_convert_path(converter: DocumentConverter):
 
 
 def test_convert_stream(converter: DocumentConverter):
-
     pdf_path = get_pdf_path()
     print(f"converting {pdf_path}")
 
diff --git a/tests/test_invalid_input.py b/tests/test_invalid_input.py
index 68716cb..3cc7a63 100644
--- a/tests/test_invalid_input.py
+++ b/tests/test_invalid_input.py
@@ -8,7 +8,6 @@ from docling.document_converter import ConversionError, DocumentConverter
 
 
 def get_pdf_path():
-
     pdf_path = Path("./tests/data/pdf/2305.03393v1-pg9.pdf")
     return pdf_path
 
diff --git a/tests/test_legacy_format_transform.py b/tests/test_legacy_format_transform.py
index c46f899..caef8ff 100644
--- a/tests/test_legacy_format_transform.py
+++ b/tests/test_legacy_format_transform.py
@@ -3,8 +3,6 @@ from pathlib import Path
 
 import pytest
 
-from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
-from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBackend
 from docling.datamodel.base_models import InputFormat
 from docling.datamodel.pipeline_options import PdfPipelineOptions
 from docling.document_converter import DocumentConverter, PdfFormatOption
@@ -23,7 +21,6 @@ def test_doc_paths():
 
 
 def get_converter():
-
     pipeline_options = PdfPipelineOptions()
     pipeline_options.do_ocr = False
 
diff --git a/tests/verify_utils.py b/tests/verify_utils.py
index 02861a8..1a913c2 100644
--- a/tests/verify_utils.py
+++ b/tests/verify_utils.py
@@ -21,7 +21,6 @@ from docling.datamodel.document import ConversionResult
 
 
 def levenshtein(str1: str, str2: str) -> int:
-
     # Ensure str1 is the shorter string to optimize memory usage
     if len(str1) > len(str2):
         str1, str2 = str2, str1
@@ -46,7 +45,6 @@ def levenshtein(str1: str, str2: str) -> int:
 
 
 def verify_text(gt: str, pred: str, fuzzy: bool, fuzzy_threshold: float = 0.4):
-
     if len(gt) == 0 or not fuzzy:
         assert gt == pred, f"{gt}!={pred}"
     else:
@@ -57,22 +55,19 @@ def verify_text(gt: str, pred: str, fuzzy: bool, fuzzy_threshold: float = 0.4):
 
 
 def verify_cells(doc_pred_pages: List[Page], doc_true_pages: List[Page]):
-
-    assert len(doc_pred_pages) == len(
-        doc_true_pages
-    ), "pred- and true-doc do not have the same number of pages"
+    assert len(doc_pred_pages) == len(doc_true_pages), (
+        "pred- and true-doc do not have the same number of pages"
+    )
 
     for pid, page_true_item in enumerate(doc_true_pages):
-
         num_true_cells = len(page_true_item.cells)
         num_pred_cells = len(doc_pred_pages[pid].cells)
 
-        assert (
-            num_true_cells == num_pred_cells
-        ), f"num_true_cells!=num_pred_cells {num_true_cells}!={num_pred_cells}"
+        assert num_true_cells == num_pred_cells, (
+            f"num_true_cells!=num_pred_cells {num_true_cells}!={num_pred_cells}"
+        )
 
         for cid, cell_true_item in enumerate(page_true_item.cells):
-
             cell_pred_item = doc_pred_pages[pid].cells[cid]
 
             true_text = cell_true_item.text
@@ -81,9 +76,9 @@ def verify_cells(doc_pred_pages: List[Page], doc_true_pages: List[Page]):
 
             true_bbox = cell_true_item.rect.to_bounding_box().as_tuple()
             pred_bbox = cell_pred_item.rect.to_bounding_box().as_tuple()
-            assert (
-                true_bbox == pred_bbox
-            ), f"bbox is not the same: {true_bbox} != {pred_bbox}"
+            assert true_bbox == pred_bbox, (
+                f"bbox is not the same: {true_bbox} != {pred_bbox}"
+            )
 
     return True
 
@@ -123,19 +118,19 @@ def verify_tables_v1(doc_pred: DsDocument, doc_true: DsDocument, fuzzy: bool):
 
     # print("Expected number of tables: {}, result: {}".format(len(doc_true.tables), len(doc_pred.tables)))
 
-    assert len(doc_true.tables) == len(
-        doc_pred.tables
-    ), "document has different count of tables than expected."
+    assert len(doc_true.tables) == len(doc_pred.tables), (
+        "document has different count of tables than expected."
+    )
 
-    for l, true_item in enumerate(doc_true.tables):
-        pred_item = doc_pred.tables[l]
+    for ix, true_item in enumerate(doc_true.tables):
+        pred_item = doc_pred.tables[ix]
 
-        assert (
-            true_item.num_rows == pred_item.num_rows
-        ), "table does not have the same #-rows"
-        assert (
-            true_item.num_cols == pred_item.num_cols
-        ), "table does not have the same #-cols"
+        assert true_item.num_rows == pred_item.num_rows, (
+            "table does not have the same #-rows"
+        )
+        assert true_item.num_cols == pred_item.num_cols, (
+            "table does not have the same #-cols"
+        )
 
         assert true_item.data is not None, "documents are expected to have table data"
         assert pred_item.data is not None, "documents are expected to have table data"
@@ -145,7 +140,6 @@ def verify_tables_v1(doc_pred: DsDocument, doc_true: DsDocument, fuzzy: bool):
 
         for i, row in enumerate(true_item.data):
             for j, col in enumerate(true_item.data[i]):
-
                 # print("true: ", true_item.data[i][j].text)
                 # print("pred: ", pred_item.data[i][j].text)
                 # print("")
@@ -154,20 +148,20 @@ def verify_tables_v1(doc_pred: DsDocument, doc_true: DsDocument, fuzzy: bool):
                     true_item.data[i][j].text, pred_item.data[i][j].text, fuzzy=fuzzy
                 )
 
-                assert (
-                    true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type
-                ), "table-cell does not have the same type"
+                assert true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type, (
+                    "table-cell does not have the same type"
+                )
 
     return True
 
 
 def verify_table_v2(true_item: TableItem, pred_item: TableItem, fuzzy: bool):
-    assert (
-        true_item.data.num_rows == pred_item.data.num_rows
-    ), "table does not have the same #-rows"
-    assert (
-        true_item.data.num_cols == pred_item.data.num_cols
-    ), "table does not have the same #-cols"
+    assert true_item.data.num_rows == pred_item.data.num_rows, (
+        "table does not have the same #-rows"
+    )
+    assert true_item.data.num_cols == pred_item.data.num_cols, (
+        "table does not have the same #-cols"
+    )
 
     assert true_item.data is not None, "documents are expected to have table data"
     assert pred_item.data is not None, "documents are expected to have table data"
@@ -177,7 +171,6 @@ def verify_table_v2(true_item: TableItem, pred_item: TableItem, fuzzy: bool):
 
     for i, row in enumerate(true_item.data.grid):
         for j, col in enumerate(true_item.data.grid[i]):
-
             # print("true: ", true_item.data[i][j].text)
             # print("pred: ", pred_item.data[i][j].text)
             # print("")
@@ -223,11 +216,11 @@ def verify_picture_image_v2(
 
 
 def verify_docitems(doc_pred: DoclingDocument, doc_true: DoclingDocument, fuzzy: bool):
-    assert len(doc_pred.texts) == len(doc_true.texts), f"Text lengths do not match."
+    assert len(doc_pred.texts) == len(doc_true.texts), "Text lengths do not match."
 
-    assert len(doc_true.tables) == len(
-        doc_pred.tables
-    ), "document has different count of tables than expected."
+    assert len(doc_true.tables) == len(doc_pred.tables), (
+        "document has different count of tables than expected."
+    )
 
     for (true_item, _true_level), (pred_item, _pred_level) in zip(
         doc_true.iterate_items(), doc_pred.iterate_items()
@@ -237,7 +230,7 @@ def verify_docitems(doc_pred: DoclingDocument, doc_true: DoclingDocument, fuzzy:
         assert isinstance(pred_item, DocItem), "Test item is not a DocItem"
 
         # Validate type
-        assert true_item.label == pred_item.label, f"Object label does not match."
+        assert true_item.label == pred_item.label, "Object label does not match."
 
         # Validate provenance
         assert len(true_item.prov) == len(pred_item.prov), "Length of prov mismatch"
@@ -261,25 +254,25 @@ def verify_docitems(doc_pred: DoclingDocument, doc_true: DoclingDocument, fuzzy:
 
         # Validate table content
         if isinstance(true_item, TableItem):
-            assert isinstance(
-                pred_item, TableItem
-            ), "Test item is not a TableItem as the expected one"
-            assert verify_table_v2(
-                true_item, pred_item, fuzzy=fuzzy
-            ), "Tables not matching"
+            assert isinstance(pred_item, TableItem), (
+                "Test item is not a TableItem as the expected one"
+            )
+            assert verify_table_v2(true_item, pred_item, fuzzy=fuzzy), (
+                "Tables not matching"
+            )
 
         # Validate picture content
         if isinstance(true_item, PictureItem):
-            assert isinstance(
-                pred_item, PictureItem
-            ), "Test item is not a PictureItem as the expected one"
+            assert isinstance(pred_item, PictureItem), (
+                "Test item is not a PictureItem as the expected one"
+            )
 
             true_image = true_item.get_image(doc=doc_true)
             pred_image = true_item.get_image(doc=doc_pred)
             if true_image is not None:
-                assert verify_picture_image_v2(
-                    true_image, pred_image
-                ), "Picture image mismatch"
+                assert verify_picture_image_v2(true_image, pred_image), (
+                    "Picture image mismatch"
+                )
 
             # TODO: check picture annotations
 
@@ -298,14 +291,14 @@ def verify_conversion_result_v1(
     input_path: Path,
     doc_result: ConversionResult,
     generate: bool = False,
-    ocr_engine: str = None,
+    ocr_engine: Optional[str] = None,
     fuzzy: bool = False,
 ):
     PageList = TypeAdapter(List[Page])
 
-    assert (
-        doc_result.status == ConversionStatus.SUCCESS
-    ), f"Doc {input_path} did not convert successfully."
+    assert doc_result.status == ConversionStatus.SUCCESS, (
+        f"Doc {input_path} did not convert successfully."
+    )
 
     doc_pred_pages: List[Page] = doc_result.pages
     doc_pred: DsDocument = doc_result.legacy_document
@@ -344,52 +337,52 @@ def verify_conversion_result_v1(
         with open(dt_path, "w") as fw:
             fw.write(doc_pred_dt)
     else:  # default branch in test
-        with open(pages_path, "r") as fr:
+        with open(pages_path) as fr:
             doc_true_pages = PageList.validate_json(fr.read())
 
-        with open(json_path, "r") as fr:
+        with open(json_path) as fr:
             doc_true: DsDocument = DsDocument.model_validate_json(fr.read())
 
-        with open(md_path, "r") as fr:
+        with open(md_path) as fr:
             doc_true_md = fr.read()
 
-        with open(dt_path, "r") as fr:
+        with open(dt_path) as fr:
             doc_true_dt = fr.read()
 
         if not fuzzy:
-            assert verify_cells(
-                doc_pred_pages, doc_true_pages
-            ), f"Mismatch in PDF cell prediction for {input_path}"
+            assert verify_cells(doc_pred_pages, doc_true_pages), (
+                f"Mismatch in PDF cell prediction for {input_path}"
+            )
 
         # assert verify_output(
         #    doc_pred, doc_true
         # ), f"Mismatch in JSON prediction for {input_path}"
 
-        assert verify_tables_v1(
-            doc_pred, doc_true, fuzzy=fuzzy
-        ), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
+        assert verify_tables_v1(doc_pred, doc_true, fuzzy=fuzzy), (
+            f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
+        )
 
-        assert verify_md(
-            doc_pred_md, doc_true_md, fuzzy=fuzzy
-        ), f"Mismatch in Markdown prediction for {input_path}"
+        assert verify_md(doc_pred_md, doc_true_md, fuzzy=fuzzy), (
+            f"Mismatch in Markdown prediction for {input_path}"
+        )
 
-        assert verify_dt(
-            doc_pred_dt, doc_true_dt, fuzzy=fuzzy
-        ), f"Mismatch in DocTags prediction for {input_path}"
+        assert verify_dt(doc_pred_dt, doc_true_dt, fuzzy=fuzzy), (
+            f"Mismatch in DocTags prediction for {input_path}"
+        )
 
 
 def verify_conversion_result_v2(
     input_path: Path,
     doc_result: ConversionResult,
     generate: bool = False,
-    ocr_engine: str = None,
+    ocr_engine: Optional[str] = None,
     fuzzy: bool = False,
 ):
     PageList = TypeAdapter(List[Page])
 
-    assert (
-        doc_result.status == ConversionStatus.SUCCESS
-    ), f"Doc {input_path} did not convert successfully."
+    assert doc_result.status == ConversionStatus.SUCCESS, (
+        f"Doc {input_path} did not convert successfully."
+    )
 
     doc_pred_pages: List[Page] = doc_result.pages
     doc_pred: DoclingDocument = doc_result.document
@@ -426,42 +419,41 @@ def verify_conversion_result_v2(
         with open(dt_path, "w") as fw:
             fw.write(doc_pred_dt)
     else:  # default branch in test
-        with open(pages_path, "r") as fr:
+        with open(pages_path) as fr:
             doc_true_pages = PageList.validate_json(fr.read())
 
-        with open(json_path, "r") as fr:
+        with open(json_path) as fr:
             doc_true: DoclingDocument = DoclingDocument.model_validate_json(fr.read())
 
-        with open(md_path, "r") as fr:
+        with open(md_path) as fr:
             doc_true_md = fr.read()
 
-        with open(dt_path, "r") as fr:
+        with open(dt_path) as fr:
             doc_true_dt = fr.read()
 
         if not fuzzy:
-            assert verify_cells(
-                doc_pred_pages, doc_true_pages
-            ), f"Mismatch in PDF cell prediction for {input_path}"
+            assert verify_cells(doc_pred_pages, doc_true_pages), (
+                f"Mismatch in PDF cell prediction for {input_path}"
+            )
 
         # assert verify_output(
         #    doc_pred, doc_true
         # ), f"Mismatch in JSON prediction for {input_path}"
 
-        assert verify_docitems(
-            doc_pred, doc_true, fuzzy=fuzzy
-        ), f"verify_docling_document(doc_pred, doc_true) mismatch for {input_path}"
+        assert verify_docitems(doc_pred, doc_true, fuzzy=fuzzy), (
+            f"verify_docling_document(doc_pred, doc_true) mismatch for {input_path}"
+        )
 
-        assert verify_md(
-            doc_pred_md, doc_true_md, fuzzy=fuzzy
-        ), f"Mismatch in Markdown prediction for {input_path}"
+        assert verify_md(doc_pred_md, doc_true_md, fuzzy=fuzzy), (
+            f"Mismatch in Markdown prediction for {input_path}"
+        )
 
-        assert verify_dt(
-            doc_pred_dt, doc_true_dt, fuzzy=fuzzy
-        ), f"Mismatch in DocTags prediction for {input_path}"
+        assert verify_dt(doc_pred_dt, doc_true_dt, fuzzy=fuzzy), (
+            f"Mismatch in DocTags prediction for {input_path}"
+        )
 
 
 def verify_document(pred_doc: DoclingDocument, gtfile: str, generate: bool = False):
-
     if not os.path.exists(gtfile) or generate:
         with open(gtfile, "w") as fw:
             json.dump(pred_doc.export_to_dict(), fw, indent=2)