Initial commit
This commit is contained in:
40
docling/pipeline/standard_model_pipeline.py
Normal file
40
docling/pipeline/standard_model_pipeline.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from docling.datamodel.base_models import Page, PipelineOptions
|
||||
from docling.models.easyocr_model import EasyOcrModel
|
||||
from docling.models.layout_model import LayoutModel
|
||||
from docling.models.page_assemble_model import PageAssembleModel
|
||||
from docling.models.table_structure_model import TableStructureModel
|
||||
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
||||
|
||||
|
||||
class StandardModelPipeline(BaseModelPipeline):
|
||||
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
|
||||
_table_model_path = "model_artifacts/tableformer"
|
||||
|
||||
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
||||
super().__init__(artifacts_path, pipeline_options)
|
||||
|
||||
self.model_pipe = [
|
||||
EasyOcrModel(
|
||||
config={
|
||||
"lang": ["fr", "de", "es", "en"],
|
||||
"enabled": pipeline_options.do_ocr,
|
||||
}
|
||||
),
|
||||
LayoutModel(
|
||||
config={
|
||||
"artifacts_path": artifacts_path
|
||||
/ StandardModelPipeline._layout_model_path
|
||||
}
|
||||
),
|
||||
TableStructureModel(
|
||||
config={
|
||||
"artifacts_path": artifacts_path
|
||||
/ StandardModelPipeline._table_model_path,
|
||||
"enabled": pipeline_options.do_table_structure,
|
||||
"do_cell_matching": False,
|
||||
}
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user