feat: Introduce the enable_remote_services option to allow remote connections while processing (#941)

* feat: Introduce the allow_remote_services option to allow remote connections while processing

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* add option in the example

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* enhance docs

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

* rename to enable_remote_services

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>

---------

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-02-12 15:18:01 +01:00 committed by GitHub
parent 5101e2519e
commit 2716c7d4ff
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 58 additions and 5 deletions

View File

@ -234,6 +234,12 @@ def convert(
Optional[Path],
typer.Option(..., help="If provided, the location of the model artifacts."),
] = None,
enable_remote_services: Annotated[
bool,
typer.Option(
..., help="Must be enabled when using models connecting to remote services."
),
] = False,
abort_on_error: Annotated[
bool,
typer.Option(
@ -380,6 +386,7 @@ def convert(
accelerator_options = AcceleratorOptions(num_threads=num_threads, device=device)
pipeline_options = PdfPipelineOptions(
enable_remote_services=enable_remote_services,
accelerator_options=accelerator_options,
do_ocr=ocr,
ocr_options=ocr_options,

View File

@ -257,6 +257,7 @@ class PipelineOptions(BaseModel):
)
document_timeout: Optional[float] = None
accelerator_options: AcceleratorOptions = AcceleratorOptions()
enable_remote_services: bool = False
class PdfPipelineOptions(PipelineOptions):

View File

@ -4,3 +4,7 @@ class BaseError(RuntimeError):
class ConversionError(BaseError):
pass
class OperationNotAllowed(BaseError):
pass

View File

@ -8,6 +8,7 @@ from PIL import Image
from pydantic import BaseModel, ConfigDict
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
from docling.exceptions import OperationNotAllowed
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
_log = logging.getLogger(__name__)
@ -45,14 +46,20 @@ class ApiResponse(BaseModel):
class PictureDescriptionApiModel(PictureDescriptionBaseModel):
# elements_batch_size = 4
def __init__(self, enabled: bool, options: PictureDescriptionApiOptions):
def __init__(
self,
enabled: bool,
enable_remote_services: bool,
options: PictureDescriptionApiOptions,
):
super().__init__(enabled=enabled, options=options)
self.options: PictureDescriptionApiOptions
if self.enabled:
if options.url.host != "localhost":
raise NotImplementedError(
"The options try to connect to remote APIs which are not yet allowed."
if not enable_remote_services:
raise OperationNotAllowed(
"Connections to remote services is only allowed when set explicitly. "
"pipeline_options.enable_remote_services=True."
)
def _annotate_images(self, images: Iterable[Image.Image]) -> Iterable[str]:

View File

@ -209,6 +209,7 @@ class StandardPdfPipeline(PaginatedPipeline):
):
return PictureDescriptionApiModel(
enabled=self.pipeline_options.do_picture_description,
enable_remote_services=self.pipeline_options.enable_remote_services,
options=self.pipeline_options.picture_description_options,
)
elif isinstance(

View File

@ -20,7 +20,9 @@ def main():
# For example, you can launch it locally with:
# $ vllm serve "HuggingFaceTB/SmolVLM-256M-Instruct"
pipeline_options = PdfPipelineOptions()
pipeline_options = PdfPipelineOptions(
enable_remote_services=True # <-- this is required!
)
pipeline_options.do_picture_description = True
pipeline_options.picture_description_options = PictureDescriptionApiOptions(
url="http://localhost:8000/v1/chat/completions",

View File

@ -71,6 +71,37 @@ Or using the CLI:
docling --artifacts-path="/local/path/to/models" FILE
```
#### Using remote services
The main purpose of Docling is to run local models which are not sharing any user data with remote services.
Anyhow, there are valid use cases for processing part of the pipeline using remote services, for example invoking OCR engines from cloud vendors or the usage of hosted LLMs.
In Docling we decided to allow such models, but we require the user to explicitly opt-in in communicating with external services.
```py
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
pipeline_options = PdfPipelineOptions(enable_remote_services=True)
doc_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}
)
```
When the value `enable_remote_services=True` is not set, the system will raise an exception `OperationNotAllowed()`.
_Note: This option is only related to the system sending user data to remote services. Control of pulling data (e.g. model weights) follows the logic described in [Model prefetching and offline usage](#model-prefetching-and-offline-usage)._
##### List of remote model services
The options in this list require the explicit `enable_remote_services=True` when processing the documents.
- `PictureDescriptionApiOptions`: Using vision models via API calls.
#### Adjust pipeline features
The example file [custom_convert.py](./examples/custom_convert.py) contains multiple ways