fix: align output formats (#49)
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
053eae4bdf
commit
8cc147bc56
@ -88,7 +88,7 @@ class DocumentConverter:
|
||||
# Note: Pdfium backend is not thread-safe, thread pool usage was disabled.
|
||||
yield from map(self.process_document, input_batch)
|
||||
|
||||
def convert_single(self, source: Path | AnyHttpUrl | str) -> Document:
|
||||
def convert_single(self, source: Path | AnyHttpUrl | str) -> ConvertedDocument:
|
||||
"""Convert a single document.
|
||||
|
||||
Args:
|
||||
@ -133,11 +133,10 @@ class DocumentConverter:
|
||||
converted_doc: ConvertedDocument = next(converted_docs_iter)
|
||||
if converted_doc.status not in {
|
||||
ConversionStatus.SUCCESS,
|
||||
ConversionStatus.SUCCESS_WITH_ERRORS,
|
||||
ConversionStatus.PARTIAL_SUCCESS,
|
||||
}:
|
||||
raise RuntimeError(f"Conversion failed with status: {converted_doc.status}")
|
||||
doc = converted_doc.to_ds_document()
|
||||
return doc
|
||||
return converted_doc
|
||||
|
||||
def process_document(self, in_doc: InputDocument) -> ConvertedDocument:
|
||||
start_doc_time = time.time()
|
||||
|
@ -1,8 +1,6 @@
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
source = "https://arxiv.org/pdf/2206.01062" # PDF path or URL
|
||||
source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
|
||||
converter = DocumentConverter()
|
||||
doc = converter.convert_single(source)
|
||||
print(
|
||||
doc.export_to_markdown()
|
||||
) # output: "## DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis [...]"
|
||||
print(doc.render_as_markdown()) # output: ## Docling Technical Report [...]"
|
||||
|
Loading…
Reference in New Issue
Block a user