Docling/docling/models/picture_description_base_model.py
Michele Dolfi 6eaae3cba0
feat: add factory for ocr engines via plugins (#1010)
* add factory for ocr engines

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* apply pre-commit after rebase

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add picture description factory

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix enable option

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* switch to create methods

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* make `options` an explicit kwarg

Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>

* keep old lock of docling-core

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* fix lock

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add allow_external_plugins option

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add factory return and ignore options type

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
Signed-off-by: Panos Vagenas <pva@zurich.ibm.com>
Co-authored-by: Panos Vagenas <pva@zurich.ibm.com>
2025-03-18 13:58:05 +01:00

81 lines
2.3 KiB
Python

import logging
from abc import abstractmethod
from pathlib import Path
from typing import Any, Iterable, List, Optional, Type, Union
from docling_core.types.doc import (
DoclingDocument,
NodeItem,
PictureClassificationClass,
PictureItem,
)
from docling_core.types.doc.document import ( # TODO: move import to docling_core.types.doc
PictureDescriptionData,
)
from PIL import Image
from docling.datamodel.pipeline_options import (
AcceleratorOptions,
PictureDescriptionBaseOptions,
)
from docling.models.base_model import (
BaseItemAndImageEnrichmentModel,
BaseModelWithOptions,
ItemAndImageEnrichmentElement,
)
class PictureDescriptionBaseModel(
BaseItemAndImageEnrichmentModel, BaseModelWithOptions
):
images_scale: float = 2.0
def __init__(
self,
*,
enabled: bool,
enable_remote_services: bool,
artifacts_path: Optional[Union[Path, str]],
options: PictureDescriptionBaseOptions,
accelerator_options: AcceleratorOptions,
):
self.enabled = enabled
self.options = options
self.provenance = "not-implemented"
def is_processable(self, doc: DoclingDocument, element: NodeItem) -> bool:
return self.enabled and isinstance(element, PictureItem)
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:
raise NotImplementedError
def __call__(
self,
doc: DoclingDocument,
element_batch: Iterable[ItemAndImageEnrichmentElement],
) -> Iterable[NodeItem]:
if not self.enabled:
for element in element_batch:
yield element.item
return
images: List[Image.Image] = []
elements: List[PictureItem] = []
for el in element_batch:
assert isinstance(el.item, PictureItem)
elements.append(el.item)
images.append(el.image)
outputs = self._annotate_images(images)
for item, output in zip(elements, outputs):
item.annotations.append(
PictureDescriptionData(text=output, provenance=self.provenance)
)
yield item
@classmethod
@abstractmethod
def get_options_type(cls) -> Type[PictureDescriptionBaseOptions]:
pass