feat: Introduce the enable_remote_services option to allow remote connections while processing (#941)

* feat: Introduce the allow_remote_services option to allow remote connections while processing

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add option in the example

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* enhance docs

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rename to enable_remote_services

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-12 15:18:01 +01:00 committed by GitHub
parent 5101e2519e
commit 2716c7d4ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 58 additions and 5 deletions

View File

@ -234,6 +234,12 @@ def convert(
Optional[Path], Optional[Path],
typer.Option(..., help="If provided, the location of the model artifacts."), typer.Option(..., help="If provided, the location of the model artifacts."),
] = None, ] = None,
enable_remote_services: Annotated[
bool,
typer.Option(
..., help="Must be enabled when using models connecting to remote services."
),
] = False,
abort_on_error: Annotated[ abort_on_error: Annotated[
bool, bool,
typer.Option( typer.Option(
@ -380,6 +386,7 @@ def convert(
accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device) accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
pipeline_options = PdfPipelineOptions( pipeline_options = PdfPipelineOptions(
enable_remote_services=enable_remote_services,
accelerator_options=accelerator_options, accelerator_options=accelerator_options,
do_ocr=ocr, do_ocr=ocr,
ocr_options=ocr_options, ocr_options=ocr_options,

View File

@ -257,6 +257,7 @@ class PipelineOptions(BaseModel):
) )
document_timeout: Optional[float] = None document_timeout: Optional[float] = None
accelerator_options: AcceleratorOptions = AcceleratorOptions() accelerator_options: AcceleratorOptions = AcceleratorOptions()
enable_remote_services: bool = False
class PdfPipelineOptions(PipelineOptions): class PdfPipelineOptions(PipelineOptions):

View File

@ -4,3 +4,7 @@ class BaseError(RuntimeError):
class ConversionError(BaseError): class ConversionError(BaseError):
pass pass
class OperationNotAllowed(BaseError):
pass

View File

@ -8,6 +8,7 @@ from PIL import Image
from pydantic import BaseModel, ConfigDict from pydantic import BaseModel, ConfigDict
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
from docling.exceptions import OperationNotAllowed
from docling.models.picture_description_base_model import PictureDescriptionBaseModel from docling.models.picture_description_base_model import PictureDescriptionBaseModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -45,14 +46,20 @@ class ApiResponse(BaseModel):
class PictureDescriptionApiModel(PictureDescriptionBaseModel): class PictureDescriptionApiModel(PictureDescriptionBaseModel):
# elements_batch_size = 4 # elements_batch_size = 4
def __init__(self, enabled: bool, options: PictureDescriptionApiOptions): def __init__(
self,
enabled: bool,
enable_remote_services: bool,
options: PictureDescriptionApiOptions,
):
super().__init__(enabled=enabled, options=options) super().__init__(enabled=enabled, options=options)
self.options: PictureDescriptionApiOptions self.options: PictureDescriptionApiOptions
if self.enabled: if self.enabled:
if options.url.host != "localhost": if not enable_remote_services:
raise NotImplementedError( raise OperationNotAllowed(
"The options try to connect to remote APIs which are not yet allowed." "Connections to remote services is only allowed when set explicitly. "
"pipeline_options.enable_remote_services=True."
) )
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]: def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:

View File

@ -209,6 +209,7 @@ class StandardPdfPipeline(PaginatedPipeline):
): ):
return PictureDescriptionApiModel( return PictureDescriptionApiModel(
enabled=self.pipeline_options.do_picture_description, enabled=self.pipeline_options.do_picture_description,
enable_remote_services=self.pipeline_options.enable_remote_services,
options=self.pipeline_options.picture_description_options, options=self.pipeline_options.picture_description_options,
) )
elif isinstance( elif isinstance(

View File

@ -20,7 +20,9 @@ def main():
# For example, you can launch it locally with: # For example, you can launch it locally with:
# $ vllm serve "HuggingFaceTB/SmolVLM-256M-Instruct" # $ vllm serve "HuggingFaceTB/SmolVLM-256M-Instruct"
pipeline_options = PdfPipelineOptions() pipeline_options = PdfPipelineOptions(
enable_remote_services=True # <-- this is required!
)
pipeline_options.do_picture_description = True pipeline_options.do_picture_description = True
pipeline_options.picture_description_options = PictureDescriptionApiOptions( pipeline_options.picture_description_options = PictureDescriptionApiOptions(
url="http://localhost:8000/v1/chat/completions", url="http://localhost:8000/v1/chat/completions",

View File

@ -71,6 +71,37 @@ Or using the CLI:
docling --artifacts-path="/local/path/to/models" FILE docling --artifacts-path="/local/path/to/models" FILE
``` ```
#### Using remote services
The main purpose of Docling is to run local models which are not sharing any user data with remote services.
Anyhow, there are valid use cases for processing part of the pipeline using remote services, for example invoking OCR engines from cloud vendors or the usage of hosted LLMs.
In Docling we decided to allow such models, but we require the user to explicitly opt-in in communicating with external services.
```py
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
pipeline_options = PdfPipelineOptions(enable_remote_services=True)
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}
)
```
When the value `enable_remote_services=True` is not set, the system will raise an exception `OperationNotAllowed()`.
_Note: This option is only related to the system sending user data to remote services. Control of pulling data (e.g. model weights) follows the logic described in [Model prefetching and offline usage](#model-prefetching-and-offline-usage)._
##### List of remote model services
The options in this list require the explicit `enable_remote_services=True` when processing the documents.
- `PictureDescriptionApiOptions`: Using vision models via API calls.
#### Adjust pipeline features #### Adjust pipeline features
The example file [custom_convert.py](./examples/custom_convert.py) contains multiple ways The example file [custom_convert.py](./examples/custom_convert.py) contains multiple ways