diff --git a/docling/cli/main.py b/docling/cli/main.py
index a2a86bf..8e0d23c 100644
--- a/docling/cli/main.py
+++ b/docling/cli/main.py
@@ -24,6 +24,7 @@ from docling.datamodel.base_models import (
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
+ OcrMacOptions,
OcrOptions,
PdfPipelineOptions,
TableFormerMode,
@@ -74,6 +75,7 @@ class OcrEngine(str, Enum):
EASYOCR = "easyocr"
TESSERACT_CLI = "tesseract_cli"
TESSERACT = "tesseract"
+ OCRMAC = "ocrmac"
def export_documents(
@@ -259,6 +261,8 @@ def convert(
ocr_options = TesseractCliOcrOptions(force_full_page_ocr=force_ocr)
case OcrEngine.TESSERACT:
ocr_options = TesseractOcrOptions(force_full_page_ocr=force_ocr)
+ case OcrEngine.OCRMAC:
+ ocr_options = OcrMacOptions(force_full_page_ocr=force_ocr)
case _:
raise RuntimeError(f"Unexpected OCR engine type {ocr_engine}")
diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py
index 1ea4d62..b691215 100644
--- a/docling/datamodel/pipeline_options.py
+++ b/docling/datamodel/pipeline_options.py
@@ -63,6 +63,17 @@ class TesseractOcrOptions(OcrOptions):
)
+class OcrMacOptions(OcrOptions):
+ kind: Literal["ocrmac"] = "ocrmac"
+ lang: List[str] = ["fr-FR", "de-DE", "es-ES", "en-US"]
+ recognition: str = "accurate"
+ framework: str = "vision"
+
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+
+
class PipelineOptions(BaseModel):
create_legacy_output: bool = (
True # This defautl will be set to False on a future version of docling
@@ -75,9 +86,9 @@ class PdfPipelineOptions(PipelineOptions):
do_ocr: bool = True # True: perform OCR, replace programmatic PDF text
table_structure_options: TableStructureOptions = TableStructureOptions()
- ocr_options: Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions] = (
- Field(EasyOcrOptions(), discriminator="kind")
- )
+ ocr_options: Union[
+ EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions, OcrMacOptions
+ ] = Field(EasyOcrOptions(), discriminator="kind")
images_scale: float = 1.0
generate_page_images: bool = False
diff --git a/docling/models/ocr_mac_model.py b/docling/models/ocr_mac_model.py
new file mode 100644
index 0000000..38bcf1c
--- /dev/null
+++ b/docling/models/ocr_mac_model.py
@@ -0,0 +1,118 @@
+import logging
+import tempfile
+from typing import Iterable, Optional, Tuple
+
+from docling_core.types.doc import BoundingBox, CoordOrigin
+
+from docling.datamodel.base_models import OcrCell, Page
+from docling.datamodel.document import ConversionResult
+from docling.datamodel.pipeline_options import OcrMacOptions
+from docling.datamodel.settings import settings
+from docling.models.base_ocr_model import BaseOcrModel
+from docling.utils.profiling import TimeRecorder
+
+_log = logging.getLogger(__name__)
+
+
+class OcrMacModel(BaseOcrModel):
+ def __init__(self, enabled: bool, options: OcrMacOptions):
+ super().__init__(enabled=enabled, options=options)
+ self.options: OcrMacOptions
+
+ self.scale = 3 # multiplier for 72 dpi == 216 dpi.
+
+ if self.enabled:
+ install_errmsg = (
+ "ocrmac is not correctly installed. "
+ "Please install it via `pip install ocrmac` to use this OCR engine. "
+ "Alternatively, Docling has support for other OCR engines. See the documentation: "
+ "https://ds4sd.github.io/docling/installation/"
+ )
+ try:
+ from ocrmac import ocrmac
+ except ImportError:
+ raise ImportError(install_errmsg)
+
+ self.reader_RIL = ocrmac.OCR
+
+ def __call__(
+ self, conv_res: ConversionResult, page_batch: Iterable[Page]
+ ) -> Iterable[Page]:
+
+ if not self.enabled:
+ yield from page_batch
+ return
+
+ for page in page_batch:
+ assert page._backend is not None
+ if not page._backend.is_valid():
+ yield page
+ else:
+ with TimeRecorder(conv_res, "ocr"):
+
+ ocr_rects = self.get_ocr_rects(page)
+
+ all_ocr_cells = []
+ for ocr_rect in ocr_rects:
+ # Skip zero area boxes
+ if ocr_rect.area() == 0:
+ continue
+ high_res_image = page._backend.get_page_image(
+ scale=self.scale, cropbox=ocr_rect
+ )
+
+ with tempfile.NamedTemporaryFile(
+ suffix=".png", mode="w"
+ ) as image_file:
+ fname = image_file.name
+ high_res_image.save(fname)
+
+ boxes = self.reader_RIL(
+ fname,
+ recognition_level=self.options.recognition,
+ framework=self.options.framework,
+ language_preference=self.options.lang,
+ ).recognize()
+
+ im_width, im_height = high_res_image.size
+ cells = []
+ for ix, (text, confidence, box) in enumerate(boxes):
+ x = float(box[0])
+ y = float(box[1])
+ w = float(box[2])
+ h = float(box[3])
+
+ x1 = x * im_width
+ y2 = (1 - y) * im_height
+
+ x2 = x1 + w * im_width
+ y1 = y2 - h * im_height
+
+ left = x1 / self.scale
+ top = y1 / self.scale
+ right = x2 / self.scale
+ bottom = y2 / self.scale
+
+ cells.append(
+ OcrCell(
+ id=ix,
+ text=text,
+ confidence=confidence,
+ bbox=BoundingBox.from_tuple(
+ coord=(left, top, right, bottom),
+ origin=CoordOrigin.TOPLEFT,
+ ),
+ )
+ )
+
+ # del high_res_image
+ all_ocr_cells.extend(cells)
+
+ # Post-process the cells
+ page.cells = self.post_process_cells(all_ocr_cells, page.cells)
+
+ # DEBUG code:
+ if settings.debug.visualize_ocr:
+ self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
+
+ yield page
diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py
index 65803d4..63a7a89 100644
--- a/docling/pipeline/standard_pdf_pipeline.py
+++ b/docling/pipeline/standard_pdf_pipeline.py
@@ -1,4 +1,5 @@
import logging
+import sys
from pathlib import Path
from typing import Optional
@@ -10,6 +11,7 @@ from docling.datamodel.base_models import AssembledUnit, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
+ OcrMacOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
@@ -18,6 +20,7 @@ from docling.models.base_ocr_model import BaseOcrModel
from docling.models.ds_glm_model import GlmModel, GlmOptions
from docling.models.easyocr_model import EasyOcrModel
from docling.models.layout_model import LayoutModel
+from docling.models.ocr_mac_model import OcrMacModel
from docling.models.page_assemble_model import PageAssembleModel, PageAssembleOptions
from docling.models.page_preprocessing_model import (
PagePreprocessingModel,
@@ -118,6 +121,15 @@ class StandardPdfPipeline(PaginatedPipeline):
enabled=self.pipeline_options.do_ocr,
options=self.pipeline_options.ocr_options,
)
+ elif isinstance(self.pipeline_options.ocr_options, OcrMacOptions):
+ if "darwin" != sys.platform:
+ raise RuntimeError(
+ f"The specified OCR type is only supported on Mac: {self.pipeline_options.ocr_options.kind}."
+ )
+ return OcrMacModel(
+ enabled=self.pipeline_options.do_ocr,
+ options=self.pipeline_options.ocr_options,
+ )
return None
def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
diff --git a/docs/examples/custom_convert.py b/docs/examples/custom_convert.py
index 7631848..2d30090 100644
--- a/docs/examples/custom_convert.py
+++ b/docs/examples/custom_convert.py
@@ -7,6 +7,7 @@ from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.models.ocr_mac_model import OcrMacOptions
from docling.models.tesseract_ocr_cli_model import TesseractCliOcrOptions
from docling.models.tesseract_ocr_model import TesseractOcrOptions
@@ -122,6 +123,20 @@ def main():
# }
# )
+ # Docling Parse with ocrmac(Mac only)
+ # ----------------------
+ # pipeline_options = PdfPipelineOptions()
+ # pipeline_options.do_ocr = True
+ # pipeline_options.do_table_structure = True
+ # pipeline_options.table_structure_options.do_cell_matching = True
+ # pipeline_options.ocr_options = OcrMacOptions()
+
+ # doc_converter = DocumentConverter(
+ # format_options={
+ # InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+ # }
+ # )
+
###########################################################################
start_time = time.time()
diff --git a/docs/examples/full_page_ocr.py b/docs/examples/full_page_ocr.py
index 35c2ba6..bbb7e12 100644
--- a/docs/examples/full_page_ocr.py
+++ b/docs/examples/full_page_ocr.py
@@ -4,6 +4,7 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
+ OcrMacOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
TesseractOcrOptions,
@@ -19,9 +20,10 @@ def main():
pipeline_options.do_table_structure = True
pipeline_options.table_structure_options.do_cell_matching = True
- # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions
+ # Any of the OCR options can be used:EasyOcrOptions, TesseractOcrOptions, TesseractCliOcrOptions, OcrMacOptions(Mac only)
# ocr_options = EasyOcrOptions(force_full_page_ocr=True)
# ocr_options = TesseractOcrOptions(force_full_page_ocr=True)
+ # ocr_options = OcrMacOptions(force_full_page_ocr=True)
ocr_options = TesseractCliOcrOptions(force_full_page_ocr=True)
pipeline_options.ocr_options = ocr_options
diff --git a/docs/installation.md b/docs/installation.md
index df18dec..addae38 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -30,6 +30,7 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
| [EasyOCR](https://github.com/JaidedAI/EasyOCR) | Default in Docling or via `pip install easyocr`. | `EasyOcrOptions` |
| Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` |
| Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` |
+ | OcrMac | System dependency. See description below. | `OcrMacOptions` |
The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example
@@ -91,6 +92,17 @@ Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 archi
pip install --no-binary :all: tesserocr
```
+
ocrmac installation
+
+ [ocrmac](https://github.com/straussmaximilian/ocrmac) is using
+ Apple's vision(or livetext) framework as OCR backend.
+ For using this engine with Docling, ocrmac must be installed on your system.
+ This only works on macOS systems with newer macOS versions (10.15+).
+
+ ```console
+ pip install ocrmac
+ ```
+
## Development setup
To develop Docling features, bugfixes etc., install as follows from your local clone's root dir:
diff --git a/poetry.lock b/poetry.lock
index f0717c4..d8dd1c8 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -182,8 +182,8 @@ files = [
lazy-object-proxy = ">=1.4.0"
typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
wrapt = [
- {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
{version = ">=1.11,<2", markers = "python_version < \"3.11\""},
+ {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
]
[[package]]
@@ -825,8 +825,8 @@ files = [
docling-core = ">=2.0,<3.0"
docutils = "!=0.21"
numpy = [
- {version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\" and python_version < \"3.13\""},
+ {version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
]
pandas = {version = ">=2.1.4,<3.0.0", markers = "python_version >= \"3.9\""}
python-dotenv = ">=1.0.0,<2.0.0"
@@ -912,8 +912,8 @@ huggingface_hub = ">=0.23,<1"
jsonlines = ">=3.1.0,<4.0.0"
mean_average_precision = ">=2021.4.26.0,<2022.0.0.0"
numpy = [
- {version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.24.4,<2.0.0", markers = "python_version < \"3.13\""},
+ {version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
]
opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
Pillow = ">=10.0.0,<11.0.0"
@@ -2063,8 +2063,8 @@ jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.1.112,<0.2.0"
packaging = ">=23.2,<25"
pydantic = [
- {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
+ {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
PyYAML = ">=5.3"
tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@@ -2132,8 +2132,8 @@ files = [
httpx = ">=0.23.0,<1"
orjson = ">=3.9.14,<4.0.0"
pydantic = [
- {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
+ {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
requests = ">=2,<3"
requests-toolbelt = ">=1.0.0,<2.0.0"
@@ -3548,6 +3548,22 @@ files = [
{file = "nvidia_nvtx_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:641dccaaa1139f3ffb0d3164b4b84f9d253397e38246a4f2f36728b48566d485"},
]
+[[package]]
+name = "ocrmac"
+version = "1.0.0"
+description = "A python wrapper to extract text from images on a mac system. Uses the vision framework from Apple."
+optional = true
+python-versions = ">=3.6"
+files = [
+ {file = "ocrmac-1.0.0-py2.py3-none-any.whl", hash = "sha256:0b5a072aa23a9ead48132cb2d595b680aa6c3c5a6cb69525155e35ca95610c3a"},
+ {file = "ocrmac-1.0.0.tar.gz", hash = "sha256:5b299e9030c973d1f60f82db000d6c2e5ff271601878c7db0885e850597d1d2e"},
+]
+
+[package.dependencies]
+Click = ">=7.0"
+pillow = "*"
+pyobjc-framework-Vision = "*"
+
[[package]]
name = "opencv-python-headless"
version = "4.10.0.84"
@@ -3566,10 +3582,10 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
[[package]]
@@ -3732,9 +3748,9 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -4331,8 +4347,8 @@ files = [
annotated-types = ">=0.6.0"
pydantic-core = "2.23.4"
typing-extensions = [
- {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
+ {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
]
[package.extras]
@@ -4500,8 +4516,8 @@ files = [
astroid = ">=2.15.8,<=2.17.0-dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
- {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
{version = ">=0.2", markers = "python_version < \"3.11\""},
+ {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
]
isort = ">=4.2.5,<6"
mccabe = ">=0.6,<0.8"
@@ -4556,6 +4572,102 @@ bulk-writer = ["azure-storage-blob", "minio (>=7.0.0)", "pyarrow (>=12.0.0)", "r
dev = ["black", "grpcio (==1.62.2)", "grpcio-testing (==1.62.2)", "grpcio-tools (==1.62.2)", "pytest (>=5.3.4)", "pytest-cov (>=2.8.1)", "pytest-timeout (>=1.3.4)", "ruff (>0.4.0)"]
model = ["milvus-model (>=0.1.0)"]
+[[package]]
+name = "pyobjc-core"
+version = "10.3.1"
+description = "Python<->ObjC Interoperability Module"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "pyobjc_core-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ea46d2cda17921e417085ac6286d43ae448113158afcf39e0abe484c58fb3d78"},
+ {file = "pyobjc_core-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:899d3c84d2933d292c808f385dc881a140cf08632907845043a333a9d7c899f9"},
+ {file = "pyobjc_core-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:6ff5823d13d0a534cdc17fa4ad47cf5bee4846ce0fd27fc40012e12b46db571b"},
+ {file = "pyobjc_core-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2581e8e68885bcb0e11ec619e81ef28e08ee3fac4de20d8cc83bc5af5bcf4a90"},
+ {file = "pyobjc_core-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ea98d4c2ec39ca29e62e0327db21418696161fb138ee6278daf2acbedf7ce504"},
+ {file = "pyobjc_core-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:4c179c26ee2123d0aabffb9dbc60324b62b6f8614fb2c2328b09386ef59ef6d8"},
+ {file = "pyobjc_core-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cb901fce65c9be420c40d8a6ee6fff5ff27c6945f44fd7191989b982baa66dea"},
+ {file = "pyobjc_core-10.3.1.tar.gz", hash = "sha256:b204a80ccc070f9ab3f8af423a3a25a6fd787e228508d00c4c30f8ac538ba720"},
+]
+
+[[package]]
+name = "pyobjc-framework-cocoa"
+version = "10.3.1"
+description = "Wrappers for the Cocoa frameworks on macOS"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4cb4f8491ab4d9b59f5187e42383f819f7a46306a4fa25b84f126776305291d1"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5f31021f4f8fdf873b57a97ee1f3c1620dbe285e0b4eaed73dd0005eb72fd773"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:11b4e0bad4bbb44a4edda128612f03cdeab38644bbf174de0c13129715497296"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:de5e62e5ccf2871a94acf3bf79646b20ea893cc9db78afa8d1fe1b0d0f7cbdb0"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c5af24610ab639bd1f521ce4500484b40787f898f691b7a23da3339e6bc8b90"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:a7151186bb7805deea434fae9a4423335e6371d105f29e73cc2036c6779a9dbc"},
+ {file = "pyobjc_framework_Cocoa-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:743d2a1ac08027fd09eab65814c79002a1d0421d7c0074ffd1217b6560889744"},
+ {file = "pyobjc_framework_cocoa-10.3.1.tar.gz", hash = "sha256:1cf20714daaa986b488fb62d69713049f635c9d41a60c8da97d835710445281a"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=10.3.1"
+
+[[package]]
+name = "pyobjc-framework-coreml"
+version = "10.3.1"
+description = "Wrappers for the framework CoreML on macOS"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_13_universal2.whl", hash = "sha256:c1fdcc0487807afa9cd0f88f25697e0e2e093d0219e8e1aa42aa3674dd78c2cb"},
+ {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:21c87e84c807b5dbe61e0f016d9aefa32d3212f175cc4b976b5c08770be7a58c"},
+ {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:a0877aed5d4cdbb63d1246cd5384c09d78a0667e83c435a1257d10017c11c1a4"},
+ {file = "pyobjc_framework_CoreML-10.3.1-cp36-abi3-macosx_11_0_universal2.whl", hash = "sha256:4bd3f1acfb3245727727b71cbcf7d21a33d7e00fa488e41ad01527764b969b92"},
+ {file = "pyobjc_framework_coreml-10.3.1.tar.gz", hash = "sha256:6b7091142cfaafee76f1a804329e7a4e3aeca921eea8644e9ceba4cc2751f705"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=10.3.1"
+pyobjc-framework-Cocoa = ">=10.3.1"
+
+[[package]]
+name = "pyobjc-framework-quartz"
+version = "10.3.1"
+description = "Wrappers for the Quartz frameworks on macOS"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "pyobjc_framework_Quartz-10.3.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5ef4fd315ed2bc42ef77fdeb2bae28a88ec986bd7b8079a87ba3b3475348f96e"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:96578d4a3e70164efe44ad7dc320ecd4e211758ffcde5dcd694de1bbdfe090a4"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:ca35f92486869a41847a1703bb176aab8a53dbfd8e678d1f4d68d8e6e1581c71"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:00a0933267e3a46ea4afcc35d117b2efb920f06de797fa66279c52e7057e3590"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a161bedb4c5257a02ad56a910cd7eefb28bdb0ea78607df0d70ed4efe4ea54c1"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:d7a8028e117a94923a511944bfa9daf9744e212f06cf89010c60934a479863a5"},
+ {file = "pyobjc_framework_Quartz-10.3.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:de00c983b3267eb26fa42c6ed9f15e2bf006bde8afa7fe2b390646aa21a5d6fc"},
+ {file = "pyobjc_framework_quartz-10.3.1.tar.gz", hash = "sha256:b6d7e346d735c9a7f147cd78e6da79eeae416a0b7d3874644c83a23786c6f886"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=10.3.1"
+pyobjc-framework-Cocoa = ">=10.3.1"
+
+[[package]]
+name = "pyobjc-framework-vision"
+version = "10.3.1"
+description = "Wrappers for the framework Vision on macOS"
+optional = true
+python-versions = ">=3.8"
+files = [
+ {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_13_universal2.whl", hash = "sha256:dff3582678930461a0bb11bf070854d49f6944a851dc89edc63fac93c75ddf39"},
+ {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_9_universal2.whl", hash = "sha256:32626183c51674efb3b5738e2884c3fea37edca010117cf71bd72cb3c49c869a"},
+ {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:2473b346a112c51ac485184305bd13c402e0db45f2df3d277315bd49efba18e9"},
+ {file = "pyobjc_framework_Vision-10.3.1-cp36-abi3-macosx_11_0_universal2.whl", hash = "sha256:4302e2c5f68c9667ecd4273809cbc4611af6368b123d69596e5b088f1b1aa16b"},
+ {file = "pyobjc_framework_vision-10.3.1.tar.gz", hash = "sha256:aa071656d395afc2d624600a9f30d6a3344aa747bf37f613ff3972158c40881c"},
+]
+
+[package.dependencies]
+pyobjc-core = ">=10.3.1"
+pyobjc-framework-Cocoa = ">=10.3.1"
+pyobjc-framework-CoreML = ">=10.3.1"
+pyobjc-framework-Quartz = ">=10.3.1"
+
[[package]]
name = "pypdfium2"
version = "4.30.0"
@@ -7248,9 +7360,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
type = ["pytest-mypy"]
[extras]
+ocrmac = ["ocrmac"]
tesserocr = ["tesserocr"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "a0f599090cfd9414c0e90fd611fd0b23166a45cd925904491eb0503a6f6bd1d8"
+content-hash = "129137f8229158ac7672919df1684a260f74db22517d4d40c905f801f2950f46"
diff --git a/pyproject.toml b/pyproject.toml
index abca531..52c6803 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,6 +48,7 @@ beautifulsoup4 = "^4.12.3"
pandas = "^2.1.4"
marko = "^2.1.2"
openpyxl = "^3.1.5"
+ocrmac = { version = "^1.0.0", markers = "sys_platform == 'darwin'", optional = true }
[tool.poetry.group.dev.dependencies]
black = {extras = ["jupyter"], version = "^24.4.2"}
@@ -95,6 +96,7 @@ torchvision = [
[tool.poetry.extras]
tesserocr = ["tesserocr"]
+ocrmac = ["ocrmac"]
[tool.poetry.scripts]
docling = "docling.cli.main:app"
@@ -130,6 +132,7 @@ module = [
"tesserocr.*",
"docling_ibm_models.*",
"easyocr.*",
+ "ocrmac.*",
"deepsearch_glm.*",
"lxml.*",
"bs4.*",
diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py
index 324a4a1..68dac33 100644
--- a/tests/test_e2e_ocr_conversion.py
+++ b/tests/test_e2e_ocr_conversion.py
@@ -1,3 +1,4 @@
+import sys
from pathlib import Path
from typing import List
@@ -6,6 +7,7 @@ from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import (
EasyOcrOptions,
+ OcrMacOptions,
OcrOptions,
PdfPipelineOptions,
TesseractCliOcrOptions,
@@ -59,6 +61,11 @@ def test_e2e_conversions():
TesseractCliOcrOptions(force_full_page_ocr=True),
]
+ # only works on mac
+ if "darwin" == sys.platform:
+ engines.append(OcrMacOptions())
+ engines.append(OcrMacOptions(force_full_page_ocr=True))
+
for ocr_options in engines:
print(f"Converting with ocr_engine: {ocr_options.kind}")
converter = get_converter(ocr_options=ocr_options)