fix: Improve OCR results, stricten criteria before dropping bitmap areas (#719)

fix: Properly care for all bitmap elements in OCR

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-01-10 10:38:49 +01:00
committed by GitHub
parent 9a6b5c8c8d
commit 5a060f237d
5 changed files with 19 additions and 16 deletions

View File

@@ -132,7 +132,7 @@ class DoclingParsePageBackend(PdfPageBackend):
return cells
def get_bitmap_rects(self, scale: float = 1) -> Iterable[BoundingBox]:
AREA_THRESHOLD = 32 * 32
AREA_THRESHOLD = 0 # 32 * 32
for i in range(len(self._dpage["images"])):
bitmap = self._dpage["images"][i]