feat: new artifacts path and CLI utility (#876)
* fix artifacts path Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add docling-models utility Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * missing formatting Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename utility to docling-tools Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename download methods and deprecation warnings Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * propagate artifacts path usage for ocr models Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * move function to utils Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * remove unused file Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * update docs Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * simplify downloading specific model(s) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * minor refactor Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
@@ -55,12 +55,13 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
||||
Processes a batch of elements and adds classification annotations.
|
||||
"""
|
||||
|
||||
_model_repo_folder = "DocumentFigureClassifier"
|
||||
images_scale = 2
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
enabled: bool,
|
||||
artifacts_path: Optional[Union[Path, str]],
|
||||
artifacts_path: Optional[Path],
|
||||
options: DocumentPictureClassifierOptions,
|
||||
accelerator_options: AcceleratorOptions,
|
||||
):
|
||||
@@ -88,9 +89,9 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
||||
)
|
||||
|
||||
if artifacts_path is None:
|
||||
artifacts_path = self.download_models_hf()
|
||||
artifacts_path = self.download_models()
|
||||
else:
|
||||
artifacts_path = Path(artifacts_path)
|
||||
artifacts_path = artifacts_path / self._model_repo_folder
|
||||
|
||||
self.document_picture_classifier = DocumentFigureClassifierPredictor(
|
||||
artifacts_path=artifacts_path,
|
||||
@@ -99,13 +100,14 @@ class DocumentPictureClassifier(BaseEnrichmentModel):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def download_models_hf(
|
||||
local_dir: Optional[Path] = None, force: bool = False
|
||||
def download_models(
|
||||
local_dir: Optional[Path] = None, force: bool = False, progress: bool = False
|
||||
) -> Path:
|
||||
from huggingface_hub import snapshot_download
|
||||
from huggingface_hub.utils import disable_progress_bars
|
||||
|
||||
disable_progress_bars()
|
||||
if not progress:
|
||||
disable_progress_bars()
|
||||
download_path = snapshot_download(
|
||||
repo_id="ds4sd/DocumentFigureClassifier",
|
||||
force_download=force,
|
||||
|
||||
Reference in New Issue
Block a user