ci: add coverage and ruff (#1383)
* add coverage calculation and push Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * new codecov version and usage of token Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * enable ruff formatter instead of black and isort Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff lint fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * apply ruff unsafe fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add removed imports Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * runs 1 on linter issues Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * finalize linter fixes Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Update pyproject.toml Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -10,7 +10,9 @@ from docling_core.types.doc import (
|
||||
TableCell,
|
||||
)
|
||||
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
||||
from docling_core.types.io import ( # DO ΝΟΤ REMOVE; explicitly exposed from this location
|
||||
|
||||
# DO NOT REMOVE; explicitly exposed from this location
|
||||
from docling_core.types.io import (
|
||||
DocumentStream,
|
||||
)
|
||||
from PIL.Image import Image
|
||||
@@ -233,9 +235,9 @@ class Page(BaseModel):
|
||||
None # Internal PDF backend. By default it is cleared during assembling.
|
||||
)
|
||||
_default_image_scale: float = 1.0 # Default image scale for external usage.
|
||||
_image_cache: Dict[float, Image] = (
|
||||
{}
|
||||
) # Cache of images in different scales. By default it is cleared during assembling.
|
||||
_image_cache: Dict[
|
||||
float, Image
|
||||
] = {} # Cache of images in different scales. By default it is cleared during assembling.
|
||||
|
||||
def get_image(
|
||||
self, scale: float = 1.0, cropbox: Optional[BoundingBox] = None
|
||||
@@ -243,7 +245,7 @@ class Page(BaseModel):
|
||||
if self._backend is None:
|
||||
return self._image_cache.get(scale, None)
|
||||
|
||||
if not scale in self._image_cache:
|
||||
if scale not in self._image_cache:
|
||||
if cropbox is None:
|
||||
self._image_cache[scale] = self._backend.get_page_image(scale=scale)
|
||||
else:
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
import csv
|
||||
import logging
|
||||
import re
|
||||
from collections.abc import Iterable
|
||||
from enum import Enum
|
||||
from io import BytesIO
|
||||
from pathlib import Path, PurePath
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Dict,
|
||||
Iterable,
|
||||
List,
|
||||
Literal,
|
||||
Optional,
|
||||
@@ -17,6 +17,8 @@ from typing import (
|
||||
)
|
||||
|
||||
import filetype
|
||||
|
||||
# DO NOT REMOVE; explicitly exposed from this location
|
||||
from docling_core.types.doc import (
|
||||
DocItem,
|
||||
DocItemLabel,
|
||||
@@ -35,14 +37,14 @@ from docling_core.types.legacy_doc.base import (
|
||||
PageReference,
|
||||
Prov,
|
||||
Ref,
|
||||
Table as DsSchemaTable,
|
||||
TableCell,
|
||||
)
|
||||
from docling_core.types.legacy_doc.base import Table as DsSchemaTable
|
||||
from docling_core.types.legacy_doc.base import TableCell
|
||||
from docling_core.types.legacy_doc.document import (
|
||||
CCSDocumentDescription as DsDocumentDescription,
|
||||
CCSFileInfoObject as DsFileInfoObject,
|
||||
ExportedCCSDocument as DsDocument,
|
||||
)
|
||||
from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject
|
||||
from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument
|
||||
from docling_core.utils.file import resolve_source_to_stream
|
||||
from docling_core.utils.legacy import docling_document_to_legacy
|
||||
from pydantic import BaseModel
|
||||
@@ -65,7 +67,7 @@ from docling.datamodel.base_models import (
|
||||
)
|
||||
from docling.datamodel.settings import DocumentLimits
|
||||
from docling.utils.profiling import ProfilingItem
|
||||
from docling.utils.utils import create_file_hash, create_hash
|
||||
from docling.utils.utils import create_file_hash
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docling.document_converter import FormatOption
|
||||
@@ -134,9 +136,9 @@ class InputDocument(BaseModel):
|
||||
self._init_doc(backend, path_or_stream)
|
||||
|
||||
elif isinstance(path_or_stream, BytesIO):
|
||||
assert (
|
||||
filename is not None
|
||||
), "Can't construct InputDocument from stream without providing filename arg."
|
||||
assert filename is not None, (
|
||||
"Can't construct InputDocument from stream without providing filename arg."
|
||||
)
|
||||
self.file = PurePath(filename)
|
||||
self.filesize = path_or_stream.getbuffer().nbytes
|
||||
|
||||
@@ -228,7 +230,6 @@ class _DummyBackend(AbstractDocumentBackend):
|
||||
|
||||
|
||||
class _DocumentConversionInput(BaseModel):
|
||||
|
||||
path_or_stream_iterator: Iterable[Union[Path, str, DocumentStream]]
|
||||
headers: Optional[Dict[str, str]] = None
|
||||
limits: Optional[DocumentLimits] = DocumentLimits()
|
||||
|
||||
@@ -380,7 +380,6 @@ class PaginatedPipelineOptions(PipelineOptions):
|
||||
|
||||
|
||||
class VlmPipelineOptions(PaginatedPipelineOptions):
|
||||
|
||||
generate_page_images: bool = True
|
||||
force_backend_text: bool = (
|
||||
False # (To be used with vlms, or other generative models)
|
||||
|
||||
Reference in New Issue
Block a user