Docling/docling/utils/orientation.py
Clément Doumouro 45265bf8b1
feat(ocr): auto-detect rotated pages in Tesseract (#1167)
* fix(ocr): tesseract support mis-oriented documents

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): update missing test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): rotate image to the natural orientation before layout prediction

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): move bounding bow rotation util to orientation.py

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): refactor rotation utilities

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* fix(ocr): avoid to swallow tesseract errors causing orientation detection failures

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): revert layout updates

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>

* chore(ocr): update e2e OCR test data

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrCliModel`

* chore(ocr): proceed to OCR without rotation when OSD fails in `TesseractOcrModel`

* chore(ocr): default `TesseractOcrCliModel._is_auto` to `False`

* fix(ocr): fix `TesseractOcrCliModel._is_auto` computation

* chore(ocr): improve logging in case of OSD failure in `TesseractOcrCliModel` and `TesseractOcrModel`

---------

Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
2025-05-21 18:12:33 +02:00

72 lines
2.0 KiB
Python

from typing import Tuple
from docling_core.types.doc import BoundingBox, CoordOrigin
from docling_core.types.doc.page import BoundingRectangle
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
def rotate_bounding_box(
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
) -> BoundingRectangle:
# The box is left top width height in TOPLEFT coordinates
# Bounding rectangle start with r_0 at the bottom left whatever the
# coordinate system. Then other corners are found rotating counterclockwise
bbox = bbox.to_top_left_origin(im_size[1])
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
im_h, im_w = im_size
angle = angle % 360
if angle == 0:
r_x0 = left
r_y0 = top + height
r_x1 = r_x0 + width
r_y1 = r_y0
r_x2 = r_x0 + width
r_y2 = r_y0 - height
r_x3 = r_x0
r_y3 = r_y0 - height
elif angle == 90:
r_x0 = im_w - (top + height)
r_y0 = left
r_x1 = r_x0
r_y1 = r_y0 + width
r_x2 = r_x0 + height
r_y2 = r_y0 + width
r_x3 = r_x0
r_y3 = r_y0 + width
elif angle == 180:
r_x0 = im_h - left
r_y0 = im_w - (top + height)
r_x1 = r_x0 - width
r_y1 = r_y0
r_x2 = r_x0 - width
r_y2 = r_y0 + height
r_x3 = r_x0
r_y3 = r_y0 + height
elif angle == 270:
r_x0 = top + height
r_y0 = im_h - left
r_x1 = r_x0
r_y1 = r_y0 - width
r_x2 = r_x0 - height
r_y2 = r_y0 - width
r_x3 = r_x0 - height
r_y3 = r_y0
else:
msg = (
f"invalid orientation {angle}, expected values in:"
f" {sorted(CLIPPED_ORIENTATIONS)}"
)
raise ValueError(msg)
return BoundingRectangle(
r_x0=r_x0,
r_y0=r_y0,
r_x1=r_x1,
r_y1=r_y1,
r_x2=r_x2,
r_y2=r_y2,
r_x3=r_x3,
r_y3=r_y3,
coord_origin=CoordOrigin.TOPLEFT,
)