
Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
41 lines
1.5 KiB
Python
41 lines
1.5 KiB
Python
from pathlib import Path
|
|
from typing import Iterable
|
|
|
|
from docling.datamodel.base_models import Page, PipelineOptions
|
|
from docling.models.easyocr_model import EasyOcrModel
|
|
from docling.models.layout_model import LayoutModel
|
|
from docling.models.page_assemble_model import PageAssembleModel
|
|
from docling.models.table_structure_model import TableStructureModel
|
|
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
|
|
|
|
|
class StandardModelPipeline(BaseModelPipeline):
|
|
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
|
|
_table_model_path = "model_artifacts/tableformer"
|
|
|
|
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
|
super().__init__(artifacts_path, pipeline_options)
|
|
|
|
self.model_pipe = [
|
|
EasyOcrModel(
|
|
config={
|
|
"lang": ["fr", "de", "es", "en"],
|
|
"enabled": pipeline_options.do_ocr,
|
|
}
|
|
),
|
|
LayoutModel(
|
|
config={
|
|
"artifacts_path": artifacts_path
|
|
/ StandardModelPipeline._layout_model_path
|
|
}
|
|
),
|
|
TableStructureModel(
|
|
config={
|
|
"artifacts_path": artifacts_path
|
|
/ StandardModelPipeline._table_model_path,
|
|
"enabled": pipeline_options.do_table_structure,
|
|
"do_cell_matching": pipeline_options.table_structure_options.do_cell_matching,
|
|
}
|
|
),
|
|
]
|