diff --git a/docling/cli/main.py b/docling/cli/main.py index e2bc0dd..6686da9 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -234,6 +234,12 @@ def convert( Optional[Path], typer.Option(..., help="If provided, the location of the model artifacts."), ] = None, + enable_remote_services: Annotated[ + bool, + typer.Option( + ..., help="Must be enabled when using models connecting to remote services." + ), + ] = False, abort_on_error: Annotated[ bool, typer.Option( @@ -380,6 +386,7 @@ def convert( accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device) pipeline_options = PdfPipelineOptions( + enable_remote_services=enable_remote_services, accelerator_options=accelerator_options, do_ocr=ocr, ocr_options=ocr_options, diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 3b6401b..16fb145 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -257,6 +257,7 @@ class PipelineOptions(BaseModel): ) document_timeout: Optional[float] = None accelerator_options: AcceleratorOptions = AcceleratorOptions() + enable_remote_services: bool = False class PdfPipelineOptions(PipelineOptions): diff --git a/docling/exceptions.py b/docling/exceptions.py index 13145b9..30c3206 100644 --- a/docling/exceptions.py +++ b/docling/exceptions.py @@ -4,3 +4,7 @@ class BaseError(RuntimeError): class ConversionError(BaseError): pass + + +class OperationNotAllowed(BaseError): + pass diff --git a/docling/models/picture_description_api_model.py b/docling/models/picture_description_api_model.py index 86b7694..c64f1bf 100644 --- a/docling/models/picture_description_api_model.py +++ b/docling/models/picture_description_api_model.py @@ -8,6 +8,7 @@ from PIL import Image from pydantic import BaseModel, ConfigDict from docling.datamodel.pipeline_options import PictureDescriptionApiOptions +from docling.exceptions import OperationNotAllowed from docling.models.picture_description_base_model import PictureDescriptionBaseModel _log = logging.getLogger(__name__) @@ -45,14 +46,20 @@ class ApiResponse(BaseModel): class PictureDescriptionApiModel(PictureDescriptionBaseModel): # elements_batch_size = 4 - def __init__(self, enabled: bool, options: PictureDescriptionApiOptions): + def __init__( + self, + enabled: bool, + enable_remote_services: bool, + options: PictureDescriptionApiOptions, + ): super().__init__(enabled=enabled, options=options) self.options: PictureDescriptionApiOptions if self.enabled: - if options.url.host != "localhost": - raise NotImplementedError( - "The options try to connect to remote APIs which are not yet allowed." + if not enable_remote_services: + raise OperationNotAllowed( + "Connections to remote services is only allowed when set explicitly. " + "pipeline_options.enable_remote_services=True." ) def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: diff --git a/docling/pipeline/standard_pdf_pipeline.py b/docling/pipeline/standard_pdf_pipeline.py index 1c71bf7..ae4ed47 100644 --- a/docling/pipeline/standard_pdf_pipeline.py +++ b/docling/pipeline/standard_pdf_pipeline.py @@ -209,6 +209,7 @@ class StandardPdfPipeline(PaginatedPipeline): ): return PictureDescriptionApiModel( enabled=self.pipeline_options.do_picture_description, + enable_remote_services=self.pipeline_options.enable_remote_services, options=self.pipeline_options.picture_description_options, ) elif isinstance( diff --git a/docs/examples/pictures_description_api.py b/docs/examples/pictures_description_api.py index 3da37ed..05689c5 100644 --- a/docs/examples/pictures_description_api.py +++ b/docs/examples/pictures_description_api.py @@ -20,7 +20,9 @@ def main(): # For example, you can launch it locally with: # $ vllm serve "HuggingFaceTB/SmolVLM-256M-Instruct" - pipeline_options = PdfPipelineOptions() + pipeline_options = PdfPipelineOptions( + enable_remote_services=True # <-- this is required! + ) pipeline_options.do_picture_description = True pipeline_options.picture_description_options = PictureDescriptionApiOptions( url="http://localhost:8000/v1/chat/completions", diff --git a/docs/usage.md b/docs/usage.md index a42bdea..4b5e4ba 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -71,6 +71,37 @@ Or using the CLI: docling --artifacts-path="/local/path/to/models" FILE ``` +#### Using remote services + +The main purpose of Docling is to run local models which are not sharing any user data with remote services. +Anyhow, there are valid use cases for processing part of the pipeline using remote services, for example invoking OCR engines from cloud vendors or the usage of hosted LLMs. + +In Docling we decided to allow such models, but we require the user to explicitly opt-in in communicating with external services. + +```py +from docling.datamodel.base_models import InputFormat +from docling.datamodel.pipeline_options import PdfPipelineOptions +from docling.document_converter import DocumentConverter, PdfFormatOption + +pipeline_options = PdfPipelineOptions(enable_remote_services=True) +doc_converter = DocumentConverter( + format_options={ + InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + } +) +``` + +When the value `enable_remote_services=True` is not set, the system will raise an exception `OperationNotAllowed()`. + +_Note: This option is only related to the system sending user data to remote services. Control of pulling data (e.g. model weights) follows the logic described in [Model prefetching and offline usage](#model-prefetching-and-offline-usage)._ + +##### List of remote model services + +The options in this list require the explicit `enable_remote_services=True` when processing the documents. + +- `PictureDescriptionApiOptions`: Using vision models via API calls. + + #### Adjust pipeline features The example file [custom_convert.py](./examples/custom_convert.py) contains multiple ways