From 5ac2887e4ad52ed6e7147e3af1e3ee5eb0006a70 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Mon, 3 Feb 2025 14:38:38 +0100 Subject: [PATCH] fix(markdown): fix parsing if doc ending with table (#873) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling/backend/md_backend.py | 1 + tests/data/groundtruth/docling_v2/ending_with_table.md.md | 6 ++++++ tests/data/md/ending_with_table.md | 6 ++++++ 3 files changed, 13 insertions(+) create mode 100644 tests/data/groundtruth/docling_v2/ending_with_table.md.md create mode 100644 tests/data/md/ending_with_table.md diff --git a/docling/backend/md_backend.py b/docling/backend/md_backend.py index 669096e..eaf4753 100644 --- a/docling/backend/md_backend.py +++ b/docling/backend/md_backend.py @@ -368,6 +368,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend): # Start iterating from the root of the AST self.iterate_elements(parsed_ast, 0, doc, None) self.process_inline_text(None, doc) # handle last hanging inline text + self.close_table(doc=doc) # handle any last hanging table # if HTML blocks were detected, export to HTML and delegate to HTML backend if self._html_blocks > 0: diff --git a/tests/data/groundtruth/docling_v2/ending_with_table.md.md b/tests/data/groundtruth/docling_v2/ending_with_table.md.md new file mode 100644 index 0000000..9c179fe --- /dev/null +++ b/tests/data/groundtruth/docling_v2/ending_with_table.md.md @@ -0,0 +1,6 @@ +| Character | Name in German | Name in French | Name in Italian | +|----------------|------------------|------------------|-------------------| +| Scrooge McDuck | Dagobert Duck | Balthazar Picsou | Paperone | +| Huey | Tick | Riri | Qui | +| Dewey | Trick | Fifi | Quo | +| Louie | Track | Loulou | Qua | diff --git a/tests/data/md/ending_with_table.md b/tests/data/md/ending_with_table.md new file mode 100644 index 0000000..6c491d6 --- /dev/null +++ b/tests/data/md/ending_with_table.md @@ -0,0 +1,6 @@ +| Character | Name in German | Name in French | Name in Italian | +|---|---|---|---| +| Scrooge McDuck | Dagobert Duck | Balthazar Picsou | Paperone | +| Huey | Tick | Riri | Qui | +| Dewey | Trick | Fifi | Quo | +| Louie | Track | Loulou | Qua |