feat: add URL support to CLI (#99)

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-09-24 08:47:53 +02:00 committed by GitHub
parent c65a01c9b7
commit 3c46e4266c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -8,6 +8,7 @@ from pathlib import Path
from typing import Annotated, Iterable, List, Optional from typing import Annotated, Iterable, List, Optional
import typer import typer
from docling_core.utils.file import resolve_file_source
from pydantic import AnyUrl from pydantic import AnyUrl
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
@ -109,11 +110,11 @@ def export_documents(
@app.command(no_args_is_help=True) @app.command(no_args_is_help=True)
def convert( def convert(
input_sources: Annotated[ input_sources: Annotated[
List[Path], List[str],
typer.Argument( typer.Argument(
..., ...,
metavar="source", metavar="source",
help="PDF files to convert. Directories are also accepted.", help="PDF files to convert. Can be local file / directory paths or URL.",
), ),
], ],
export_json: Annotated[ export_json: Annotated[
@ -167,7 +168,8 @@ def convert(
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
input_doc_paths: List[Path] = [] input_doc_paths: List[Path] = []
for source in input_sources: for src in input_sources:
source = resolve_file_source(source=src)
if not source.exists(): if not source.exists():
err_console.print( err_console.print(
f"[red]Error: The input file {source} does not exist.[/red]" f"[red]Error: The input file {source} does not exist.[/red]"