From af323c04ef59bed23d5d7112ae8bf063635243aa Mon Sep 17 00:00:00 2001 From: Johnny Salazar Date: Mon, 4 Nov 2024 20:24:13 +0700 Subject: [PATCH] fit: Specify encoding when writing output file (#214) Specify encoding when writing output file to avoid errors when default target encoding doesn't have all characters. utf8 seems like the most universal and supported encoding. Otherwise, the cli fails with encoding errors when input file contains unicode text (basically most files nowadays) and the target system has default encoding set to some one-byte charset like cp1252 Signed-off-by: Johnny Salazar --- docling/cli/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docling/cli/main.py b/docling/cli/main.py index 1800ea1..f5e2e1a 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -90,28 +90,28 @@ def export_documents( # Export Deep Search document JSON format: if export_json: fname = output_dir / f"{doc_filename}.json" - with fname.open("w") as fp: + with fname.open("w", encoding="utf8") as fp: _log.info(f"writing JSON output to {fname}") fp.write(json.dumps(conv_res.document.export_to_dict())) # Export Text format: if export_txt: fname = output_dir / f"{doc_filename}.txt" - with fname.open("w") as fp: + with fname.open("w", encoding="utf8") as fp: _log.info(f"writing Text output to {fname}") fp.write(conv_res.document.export_to_markdown(strict_text=True)) # Export Markdown format: if export_md: fname = output_dir / f"{doc_filename}.md" - with fname.open("w") as fp: + with fname.open("w", encoding="utf8") as fp: _log.info(f"writing Markdown output to {fname}") fp.write(conv_res.document.export_to_markdown()) # Export Document Tags format: if export_doctags: fname = output_dir / f"{doc_filename}.doctags" - with fname.open("w") as fp: + with fname.open("w", encoding="utf8") as fp: _log.info(f"writing Doc Tags output to {fname}") fp.write(conv_res.document.export_to_document_tokens())