Initial commit
This commit is contained in:
0
docling/pipeline/__init__.py
Normal file
0
docling/pipeline/__init__.py
Normal file
18
docling/pipeline/base_model_pipeline.py
Normal file
18
docling/pipeline/base_model_pipeline.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from abc import abstractmethod
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from docling.datamodel.base_models import Page, PipelineOptions
|
||||
|
||||
|
||||
class BaseModelPipeline:
|
||||
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
||||
self.model_pipe = []
|
||||
self.artifacts_path = artifacts_path
|
||||
self.pipeline_options = pipeline_options
|
||||
|
||||
def apply(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
||||
for model in self.model_pipe:
|
||||
page_batch = model(page_batch)
|
||||
|
||||
yield from page_batch
|
||||
40
docling/pipeline/standard_model_pipeline.py
Normal file
40
docling/pipeline/standard_model_pipeline.py
Normal file
@@ -0,0 +1,40 @@
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
from docling.datamodel.base_models import Page, PipelineOptions
|
||||
from docling.models.easyocr_model import EasyOcrModel
|
||||
from docling.models.layout_model import LayoutModel
|
||||
from docling.models.page_assemble_model import PageAssembleModel
|
||||
from docling.models.table_structure_model import TableStructureModel
|
||||
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
||||
|
||||
|
||||
class StandardModelPipeline(BaseModelPipeline):
|
||||
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
|
||||
_table_model_path = "model_artifacts/tableformer"
|
||||
|
||||
def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions):
|
||||
super().__init__(artifacts_path, pipeline_options)
|
||||
|
||||
self.model_pipe = [
|
||||
EasyOcrModel(
|
||||
config={
|
||||
"lang": ["fr", "de", "es", "en"],
|
||||
"enabled": pipeline_options.do_ocr,
|
||||
}
|
||||
),
|
||||
LayoutModel(
|
||||
config={
|
||||
"artifacts_path": artifacts_path
|
||||
/ StandardModelPipeline._layout_model_path
|
||||
}
|
||||
),
|
||||
TableStructureModel(
|
||||
config={
|
||||
"artifacts_path": artifacts_path
|
||||
/ StandardModelPipeline._table_model_path,
|
||||
"enabled": pipeline_options.do_table_structure,
|
||||
"do_cell_matching": False,
|
||||
}
|
||||
),
|
||||
]
|
||||
Reference in New Issue
Block a user