feat: added http header support for document converter and cli (#642)
* added http header support for document converter and cli Signed-off-by: Luke Harrison <Luke.Harrison1@ibm.com> * fixed formatting and typing issues Signed-off-by: Luke Harrison <Luke.Harrison1@ibm.com> * use pydantic to parse dict suggested by @dolfim-ibm Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Signed-off-by: Luke Harrison <luke.harrison1@ibm.com> --------- Signed-off-by: Luke Harrison <Luke.Harrison1@ibm.com> Signed-off-by: Luke Harrison <luke.harrison1@ibm.com> Co-authored-by: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com>
This commit is contained in:
@@ -164,6 +164,11 @@ def convert(
|
||||
to_formats: List[OutputFormat] = typer.Option(
|
||||
None, "--to", help="Specify output formats. Defaults to Markdown."
|
||||
),
|
||||
headers: str = typer.Option(
|
||||
None,
|
||||
"--headers",
|
||||
help="Specify http request headers used when fetching url input sources in the form of a JSON string",
|
||||
),
|
||||
image_export_mode: Annotated[
|
||||
ImageRefMode,
|
||||
typer.Option(
|
||||
@@ -279,12 +284,19 @@ def convert(
|
||||
if from_formats is None:
|
||||
from_formats = [e for e in InputFormat]
|
||||
|
||||
parsed_headers: Optional[Dict[str, str]] = None
|
||||
if headers is not None:
|
||||
headers_t = TypeAdapter(Dict[str, str])
|
||||
parsed_headers = headers_t.validate_json(headers)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
input_doc_paths: List[Path] = []
|
||||
for src in input_sources:
|
||||
try:
|
||||
# check if we can fetch some remote url
|
||||
source = resolve_source_to_path(source=src, workdir=Path(tempdir))
|
||||
source = resolve_source_to_path(
|
||||
source=src, headers=parsed_headers, workdir=Path(tempdir)
|
||||
)
|
||||
input_doc_paths.append(source)
|
||||
except FileNotFoundError:
|
||||
err_console.print(
|
||||
@@ -390,7 +402,7 @@ def convert(
|
||||
start_time = time.time()
|
||||
|
||||
conv_results = doc_converter.convert_all(
|
||||
input_doc_paths, raises_on_error=abort_on_error
|
||||
input_doc_paths, headers=parsed_headers, raises_on_error=abort_on_error
|
||||
)
|
||||
|
||||
output.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
Reference in New Issue
Block a user