fix(asciidoc): set default size when missing in image directive (#1769)

The AsciiDoc backend should not create an ImageRef with Size equal to None, instead use default size values.
Refactor static methods as such and add the staticmethod decorator.
Extend the regression test for this fix.

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis 2025-06-16 10:38:46 +02:00 committed by GitHub
parent 7d3302cb48
commit b886e4df31
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 114 additions and 19 deletions

View File

@ -2,7 +2,7 @@ import logging
import re import re
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Set, Union from typing import Final, Set, Union
from docling_core.types.doc import ( from docling_core.types.doc import (
DocItemLabel, DocItemLabel,
@ -22,6 +22,9 @@ from docling.datamodel.document import InputDocument
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
DEFAULT_IMAGE_WIDTH: Final = 128
DEFAULT_IMAGE_HEIGHT: Final = 128
class AsciiDocBackend(DeclarativeDocumentBackend): class AsciiDocBackend(DeclarativeDocumentBackend):
def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]): def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]):
@ -200,9 +203,11 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
item = self._parse_picture(line) item = self._parse_picture(line)
size = None size: Size
if "width" in item and "height" in item: if "width" in item and "height" in item:
size = Size(width=int(item["width"]), height=int(item["height"])) size = Size(width=int(item["width"]), height=int(item["height"]))
else:
size = Size(width=DEFAULT_IMAGE_WIDTH, height=DEFAULT_IMAGE_HEIGHT)
uri = None uri = None
if ( if (
@ -264,14 +269,16 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
return doc return doc
def _get_current_level(self, parents): @staticmethod
def _get_current_level(parents):
for k, v in parents.items(): for k, v in parents.items():
if v is None and k > 0: if v is None and k > 0:
return k - 1 return k - 1
return 0 return 0
def _get_current_parent(self, parents): @staticmethod
def _get_current_parent(parents):
for k, v in parents.items(): for k, v in parents.items():
if v is None and k > 0: if v is None and k > 0:
return parents[k - 1] return parents[k - 1]
@ -279,17 +286,21 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
return None return None
# ========= Title # ========= Title
def _is_title(self, line): @staticmethod
def _is_title(line):
return re.match(r"^= ", line) return re.match(r"^= ", line)
def _parse_title(self, line): @staticmethod
def _parse_title(line):
return {"type": "title", "text": line[2:].strip(), "level": 0} return {"type": "title", "text": line[2:].strip(), "level": 0}
# ========= Section headers # ========= Section headers
def _is_section_header(self, line): @staticmethod
def _is_section_header(line):
return re.match(r"^==+\s+", line) return re.match(r"^==+\s+", line)
def _parse_section_header(self, line): @staticmethod
def _parse_section_header(line):
match = re.match(r"^(=+)\s+(.*)", line) match = re.match(r"^(=+)\s+(.*)", line)
marker = match.group(1) # The list marker (e.g., "*", "-", "1.") marker = match.group(1) # The list marker (e.g., "*", "-", "1.")
@ -303,10 +314,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
} }
# ========= Lists # ========= Lists
def _is_list_item(self, line): @staticmethod
def _is_list_item(line):
return re.match(r"^(\s)*(\*|-|\d+\.|\w+\.) ", line) return re.match(r"^(\s)*(\*|-|\d+\.|\w+\.) ", line)
def _parse_list_item(self, line): @staticmethod
def _parse_list_item(line):
"""Extract the item marker (number or bullet symbol) and the text of the item.""" """Extract the item marker (number or bullet symbol) and the text of the item."""
match = re.match(r"^(\s*)(\*|-|\d+\.)\s+(.*)", line) match = re.match(r"^(\s*)(\*|-|\d+\.)\s+(.*)", line)
@ -342,14 +355,17 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
} }
# ========= Tables # ========= Tables
def _is_table_line(self, line): @staticmethod
def _is_table_line(line):
return re.match(r"^\|.*\|", line) return re.match(r"^\|.*\|", line)
def _parse_table_line(self, line): @staticmethod
def _parse_table_line(line):
# Split table cells and trim extra spaces # Split table cells and trim extra spaces
return [cell.strip() for cell in line.split("|") if cell.strip()] return [cell.strip() for cell in line.split("|") if cell.strip()]
def _populate_table_as_grid(self, table_data): @staticmethod
def _populate_table_as_grid(table_data):
num_rows = len(table_data) num_rows = len(table_data)
# Adjust the table data into a grid format # Adjust the table data into a grid format
@ -380,10 +396,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
return data return data
# ========= Pictures # ========= Pictures
def _is_picture(self, line): @staticmethod
def _is_picture(line):
return re.match(r"^image::", line) return re.match(r"^image::", line)
def _parse_picture(self, line): @staticmethod
def _parse_picture(line):
""" """
Parse an image macro, extracting its path and attributes. Parse an image macro, extracting its path and attributes.
Syntax: image::path/to/image.png[Alt Text, width=200, height=150, align=center] Syntax: image::path/to/image.png[Alt Text, width=200, height=150, align=center]
@ -406,10 +424,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
return {"type": "picture", "uri": line} return {"type": "picture", "uri": line}
# ========= Captions # ========= Captions
def _is_caption(self, line): @staticmethod
def _is_caption(line):
return re.match(r"^\.(.+)", line) return re.match(r"^\.(.+)", line)
def _parse_caption(self, line): @staticmethod
def _parse_caption(line):
mtch = re.match(r"^\.(.+)", line) mtch = re.match(r"^\.(.+)", line)
if mtch: if mtch:
text = mtch.group(1) text = mtch.group(1)
@ -418,5 +438,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend):
return {"type": "caption", "text": ""} return {"type": "caption", "text": ""}
# ========= Plain text # ========= Plain text
def _parse_text(self, line): @staticmethod
def _parse_text(line):
return {"type": "text", "text": line.strip()} return {"type": "text", "text": line.strip()}

29
tests/data/asciidoc/test_03.asciidoc vendored Normal file
View File

@ -0,0 +1,29 @@
:_mod-docs-content-type: PROCEDURE
:experimental:
[id="renaming-a-bookmark_{context}"]
= Renaming a bookmark
You can rename a bookmark to distinguish it from other bookmarks. If you have bookmarks to several folders that all share the same name, you can tell the bookmarks apart if you rename them.
Renaming the bookmark does not rename the folder.
.Procedure
. Right-click the bookmark in the side bar.
. Select *Rename…*.
+
image::rename-bookmark-menu.png[Rename bookmark menu]
. In the *Name* field, enter the new name for the bookmark.
+
image::rename-bookmark-text.png[Bookmark name field]
. Click btn:[Rename].
.Verification
* Check that the side bar lists the bookmark under the new name.
+
image::renamed-bookmark.png[Renamed bookmark]

View File

@ -0,0 +1,23 @@
:\_mod-docs-content-type: PROCEDURE :experimental:
# Renaming a bookmark
[id="renaming-a-bookmark\_{context}"]
You can rename a bookmark to distinguish it from other bookmarks. If you have bookmarks to several folders that all share the same name, you can tell the bookmarks apart if you rename them.
Renaming the bookmark does not rename the folder.
- Check that the side bar lists the bookmark under the new name.
Procedure . Right-click the bookmark in the side bar. . Select *Rename…*. +
<!-- image -->
In the *Name* field, enter the new name for the bookmark. +
<!-- image -->
Click btn:[Rename]. .Verification
<!-- image -->

View File

@ -2,7 +2,11 @@ import glob
import os import os
from pathlib import Path from pathlib import Path
from docling.backend.asciidoc_backend import AsciiDocBackend from docling.backend.asciidoc_backend import (
DEFAULT_IMAGE_HEIGHT,
DEFAULT_IMAGE_WIDTH,
AsciiDocBackend,
)
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument from docling.datamodel.document import InputDocument
@ -18,6 +22,24 @@ def _get_backend(fname):
return doc_backend return doc_backend
def test_parse_picture():
line = (
"image::images/example1.png[Example Image, width=200, height=150, align=center]"
)
res = AsciiDocBackend._parse_picture(line)
assert res
assert res.get("width", 0) == "200"
assert res.get("height", 0) == "150"
assert res.get("uri", "") == "images/example1.png"
line = "image::renamed-bookmark.png[Renamed bookmark]"
res = AsciiDocBackend._parse_picture(line)
assert res
assert "width" not in res
assert "height" not in res
assert res.get("uri", "") == "renamed-bookmark.png"
def test_asciidocs_examples(): def test_asciidocs_examples():
fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc")) fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc"))