fix: Correct scaling of debug visualizations, tune OCR (#700)
* fix: Correct scaling of debug visualizations, tune OCR Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * chore: remove unused imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * chore: Update docling-core Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
ead396ab40
commit
5cb4cf6f19
@ -138,18 +138,31 @@ class BaseOcrModel(BasePageModel):
|
|||||||
|
|
||||||
def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
|
def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
|
||||||
image = copy.deepcopy(page.image)
|
image = copy.deepcopy(page.image)
|
||||||
|
scale_x = image.width / page.size.width
|
||||||
|
scale_y = image.height / page.size.height
|
||||||
|
|
||||||
draw = ImageDraw.Draw(image, "RGBA")
|
draw = ImageDraw.Draw(image, "RGBA")
|
||||||
|
|
||||||
# Draw OCR rectangles as yellow filled rect
|
# Draw OCR rectangles as yellow filled rect
|
||||||
for rect in ocr_rects:
|
for rect in ocr_rects:
|
||||||
x0, y0, x1, y1 = rect.as_tuple()
|
x0, y0, x1, y1 = rect.as_tuple()
|
||||||
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
x0 *= scale_x
|
||||||
|
x1 *= scale_x
|
||||||
|
|
||||||
shade_color = (255, 255, 0, 40) # transparent yellow
|
shade_color = (255, 255, 0, 40) # transparent yellow
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
|
draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
|
||||||
|
|
||||||
# Draw OCR and programmatic cells
|
# Draw OCR and programmatic cells
|
||||||
for tc in page.cells:
|
for tc in page.cells:
|
||||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||||
color = "red"
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
x0 *= scale_x
|
||||||
|
x1 *= scale_x
|
||||||
|
|
||||||
|
color = "gray"
|
||||||
if isinstance(tc, OcrCell):
|
if isinstance(tc, OcrCell):
|
||||||
color = "magenta"
|
color = "magenta"
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
||||||
|
@ -67,29 +67,9 @@ class LayoutModel(BasePageModel):
|
|||||||
- Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
|
- Right: Clusters including FORM, KEY_VALUE_REGION, and PICTURE.
|
||||||
Includes label names and confidence scores for each cluster.
|
Includes label names and confidence scores for each cluster.
|
||||||
"""
|
"""
|
||||||
label_to_color = {
|
scale_x = page.image.width / page.size.width
|
||||||
DocItemLabel.TEXT: (255, 255, 153), # Light Yellow
|
scale_y = page.image.height / page.size.height
|
||||||
DocItemLabel.CAPTION: (255, 204, 153), # Light Orange
|
|
||||||
DocItemLabel.LIST_ITEM: (153, 153, 255), # Light Purple
|
|
||||||
DocItemLabel.FORMULA: (192, 192, 192), # Gray
|
|
||||||
DocItemLabel.TABLE: (255, 204, 204), # Light Pink
|
|
||||||
DocItemLabel.PICTURE: (255, 204, 164), # Light Beige
|
|
||||||
DocItemLabel.SECTION_HEADER: (255, 153, 153), # Light Red
|
|
||||||
DocItemLabel.PAGE_HEADER: (204, 255, 204), # Light Green
|
|
||||||
DocItemLabel.PAGE_FOOTER: (
|
|
||||||
204,
|
|
||||||
255,
|
|
||||||
204,
|
|
||||||
), # Light Green (same as Page-Header)
|
|
||||||
DocItemLabel.TITLE: (255, 153, 153), # Light Red (same as Section-Header)
|
|
||||||
DocItemLabel.FOOTNOTE: (200, 200, 255), # Light Blue
|
|
||||||
DocItemLabel.DOCUMENT_INDEX: (220, 220, 220), # Light Gray
|
|
||||||
DocItemLabel.CODE: (125, 125, 125), # Gray
|
|
||||||
DocItemLabel.CHECKBOX_SELECTED: (255, 182, 193), # Pale Green
|
|
||||||
DocItemLabel.CHECKBOX_UNSELECTED: (255, 182, 193), # Light Pink
|
|
||||||
DocItemLabel.FORM: (200, 255, 255), # Light Cyan
|
|
||||||
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14), # Rusty orange
|
|
||||||
}
|
|
||||||
# Filter clusters for left and right images
|
# Filter clusters for left and right images
|
||||||
exclude_labels = {
|
exclude_labels = {
|
||||||
DocItemLabel.FORM,
|
DocItemLabel.FORM,
|
||||||
@ -118,6 +98,11 @@ class LayoutModel(BasePageModel):
|
|||||||
cell_color = (0, 0, 0, 40) # Transparent black for cells
|
cell_color = (0, 0, 0, 40) # Transparent black for cells
|
||||||
for tc in c.cells:
|
for tc in c.cells:
|
||||||
cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
|
cx0, cy0, cx1, cy1 = tc.bbox.as_tuple()
|
||||||
|
cx0 *= scale_x
|
||||||
|
cx1 *= scale_x
|
||||||
|
cy0 *= scale_x
|
||||||
|
cy1 *= scale_y
|
||||||
|
|
||||||
draw.rectangle(
|
draw.rectangle(
|
||||||
[(cx0, cy0), (cx1, cy1)],
|
[(cx0, cy0), (cx1, cy1)],
|
||||||
outline=None,
|
outline=None,
|
||||||
@ -125,8 +110,16 @@ class LayoutModel(BasePageModel):
|
|||||||
)
|
)
|
||||||
# Draw cluster rectangle
|
# Draw cluster rectangle
|
||||||
x0, y0, x1, y1 = c.bbox.as_tuple()
|
x0, y0, x1, y1 = c.bbox.as_tuple()
|
||||||
cluster_fill_color = (*list(label_to_color.get(c.label)), 70)
|
x0 *= scale_x
|
||||||
cluster_outline_color = (*list(label_to_color.get(c.label)), 255)
|
x1 *= scale_x
|
||||||
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
|
||||||
|
cluster_fill_color = (*list(DocItemLabel.get_color(c.label)), 70)
|
||||||
|
cluster_outline_color = (
|
||||||
|
*list(DocItemLabel.get_color(c.label)),
|
||||||
|
255,
|
||||||
|
)
|
||||||
draw.rectangle(
|
draw.rectangle(
|
||||||
[(x0, y0), (x1, y1)],
|
[(x0, y0), (x1, y1)],
|
||||||
outline=cluster_outline_color,
|
outline=cluster_outline_color,
|
||||||
|
@ -66,23 +66,43 @@ class TableStructureModel(BasePageModel):
|
|||||||
show: bool = False,
|
show: bool = False,
|
||||||
):
|
):
|
||||||
assert page._backend is not None
|
assert page._backend is not None
|
||||||
|
assert page.size is not None
|
||||||
|
|
||||||
image = (
|
image = (
|
||||||
page._backend.get_page_image()
|
page._backend.get_page_image()
|
||||||
) # make new image to avoid drawing on the saved ones
|
) # make new image to avoid drawing on the saved ones
|
||||||
|
|
||||||
|
scale_x = image.width / page.size.width
|
||||||
|
scale_y = image.height / page.size.height
|
||||||
|
|
||||||
draw = ImageDraw.Draw(image)
|
draw = ImageDraw.Draw(image)
|
||||||
|
|
||||||
for table_element in tbl_list:
|
for table_element in tbl_list:
|
||||||
x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
|
x0, y0, x1, y1 = table_element.cluster.bbox.as_tuple()
|
||||||
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
x0 *= scale_x
|
||||||
|
x1 *= scale_x
|
||||||
|
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
||||||
|
|
||||||
for cell in table_element.cluster.cells:
|
for cell in table_element.cluster.cells:
|
||||||
x0, y0, x1, y1 = cell.bbox.as_tuple()
|
x0, y0, x1, y1 = cell.bbox.as_tuple()
|
||||||
|
x0 *= scale_x
|
||||||
|
x1 *= scale_x
|
||||||
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
|
||||||
|
|
||||||
for tc in table_element.table_cells:
|
for tc in table_element.table_cells:
|
||||||
if tc.bbox is not None:
|
if tc.bbox is not None:
|
||||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||||
|
x0 *= scale_x
|
||||||
|
x1 *= scale_x
|
||||||
|
y0 *= scale_x
|
||||||
|
y1 *= scale_y
|
||||||
|
|
||||||
if tc.column_header:
|
if tc.column_header:
|
||||||
width = 3
|
width = 3
|
||||||
else:
|
else:
|
||||||
|
1525
poetry.lock
generated
1525
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -25,7 +25,7 @@ packages = [{include = "docling"}]
|
|||||||
# actual dependencies:
|
# actual dependencies:
|
||||||
######################
|
######################
|
||||||
python = "^3.9"
|
python = "^3.9"
|
||||||
docling-core = { version = "^2.12.1", extras = ["chunking"] }
|
docling-core = { version = "^2.13.1", extras = ["chunking"] }
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-ibm-models = "^3.1.0"
|
docling-ibm-models = "^3.1.0"
|
||||||
deepsearch-glm = "^1.0.0"
|
deepsearch-glm = "^1.0.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user