diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 9791a25..8e99cd0 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -213,8 +213,8 @@ class PictureDescriptionBaseOptions(BaseOptions): batch_size: int = 8 scale: float = 2 - bitmap_area_threshold: float = ( - 0.2 # percentage of the area for a bitmap to processed with the models + picture_area_threshold: float = ( + 0.05 # percentage of the area for a picture to processed with the models ) diff --git a/docling/models/picture_description_base_model.py b/docling/models/picture_description_base_model.py index 129387b..9616922 100644 --- a/docling/models/picture_description_base_model.py +++ b/docling/models/picture_description_base_model.py @@ -63,8 +63,20 @@ class PictureDescriptionBaseModel( elements: List[PictureItem] = [] for el in element_batch: assert isinstance(el.item, PictureItem) - elements.append(el.item) - images.append(el.image) + describe_image = True + # Don't describe the image if it's smaller than the threshold + if len(el.item.prov) > 0: + prov = el.item.prov[0] # PictureItems have at most a single provenance + page = doc.pages.get(prov.page_no) + if page is not None: + page_area = page.size.width * page.size.height + if page_area > 0: + area_fraction = prov.bbox.area() / page_area + if area_fraction < self.options.picture_area_threshold: + describe_image = False + if describe_image: + elements.append(el.item) + images.append(el.image) outputs = self._annotate_images(images)