diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py index 5328e8d..859646e 100644 --- a/docling/backend/asciidoc_backend.py +++ b/docling/backend/asciidoc_backend.py @@ -2,7 +2,7 @@ import logging import re from io import BytesIO from pathlib import Path -from typing import Set, Union +from typing import Final, Set, Union from docling_core.types.doc import ( DocItemLabel, @@ -22,6 +22,9 @@ from docling.datamodel.document import InputDocument _log = logging.getLogger(__name__) +DEFAULT_IMAGE_WIDTH: Final = 128 +DEFAULT_IMAGE_HEIGHT: Final = 128 + class AsciiDocBackend(DeclarativeDocumentBackend): def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]): @@ -200,9 +203,11 @@ class AsciiDocBackend(DeclarativeDocumentBackend): item = self._parse_picture(line) - size = None + size: Size if "width" in item and "height" in item: size = Size(width=int(item["width"]), height=int(item["height"])) + else: + size = Size(width=DEFAULT_IMAGE_WIDTH, height=DEFAULT_IMAGE_HEIGHT) uri = None if ( @@ -264,14 +269,16 @@ class AsciiDocBackend(DeclarativeDocumentBackend): return doc - def _get_current_level(self, parents): + @staticmethod + def _get_current_level(parents): for k, v in parents.items(): if v is None and k > 0: return k - 1 return 0 - def _get_current_parent(self, parents): + @staticmethod + def _get_current_parent(parents): for k, v in parents.items(): if v is None and k > 0: return parents[k - 1] @@ -279,17 +286,21 @@ class AsciiDocBackend(DeclarativeDocumentBackend): return None # ========= Title - def _is_title(self, line): + @staticmethod + def _is_title(line): return re.match(r"^= ", line) - def _parse_title(self, line): + @staticmethod + def _parse_title(line): return {"type": "title", "text": line[2:].strip(), "level": 0} # ========= Section headers - def _is_section_header(self, line): + @staticmethod + def _is_section_header(line): return re.match(r"^==+\s+", line) - def _parse_section_header(self, line): + @staticmethod + def _parse_section_header(line): match = re.match(r"^(=+)\s+(.*)", line) marker = match.group(1) # The list marker (e.g., "*", "-", "1.") @@ -303,10 +314,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend): } # ========= Lists - def _is_list_item(self, line): + @staticmethod + def _is_list_item(line): return re.match(r"^(\s)*(\*|-|\d+\.|\w+\.) ", line) - def _parse_list_item(self, line): + @staticmethod + def _parse_list_item(line): """Extract the item marker (number or bullet symbol) and the text of the item.""" match = re.match(r"^(\s*)(\*|-|\d+\.)\s+(.*)", line) @@ -342,14 +355,17 @@ class AsciiDocBackend(DeclarativeDocumentBackend): } # ========= Tables - def _is_table_line(self, line): + @staticmethod + def _is_table_line(line): return re.match(r"^\|.*\|", line) - def _parse_table_line(self, line): + @staticmethod + def _parse_table_line(line): # Split table cells and trim extra spaces return [cell.strip() for cell in line.split("|") if cell.strip()] - def _populate_table_as_grid(self, table_data): + @staticmethod + def _populate_table_as_grid(table_data): num_rows = len(table_data) # Adjust the table data into a grid format @@ -380,10 +396,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend): return data # ========= Pictures - def _is_picture(self, line): + @staticmethod + def _is_picture(line): return re.match(r"^image::", line) - def _parse_picture(self, line): + @staticmethod + def _parse_picture(line): """ Parse an image macro, extracting its path and attributes. Syntax: image::path/to/image.png[Alt Text, width=200, height=150, align=center] @@ -406,10 +424,12 @@ class AsciiDocBackend(DeclarativeDocumentBackend): return {"type": "picture", "uri": line} # ========= Captions - def _is_caption(self, line): + @staticmethod + def _is_caption(line): return re.match(r"^\.(.+)", line) - def _parse_caption(self, line): + @staticmethod + def _parse_caption(line): mtch = re.match(r"^\.(.+)", line) if mtch: text = mtch.group(1) @@ -418,5 +438,6 @@ class AsciiDocBackend(DeclarativeDocumentBackend): return {"type": "caption", "text": ""} # ========= Plain text - def _parse_text(self, line): + @staticmethod + def _parse_text(line): return {"type": "text", "text": line.strip()} diff --git a/tests/data/asciidoc/test_03.asciidoc b/tests/data/asciidoc/test_03.asciidoc new file mode 100644 index 0000000..7ca6157 --- /dev/null +++ b/tests/data/asciidoc/test_03.asciidoc @@ -0,0 +1,29 @@ +:_mod-docs-content-type: PROCEDURE +:experimental: + +[id="renaming-a-bookmark_{context}"] += Renaming a bookmark + +You can rename a bookmark to distinguish it from other bookmarks. If you have bookmarks to several folders that all share the same name, you can tell the bookmarks apart if you rename them. + +Renaming the bookmark does not rename the folder. + +.Procedure + +. Right-click the bookmark in the side bar. + +. Select *Rename…*. ++ +image::rename-bookmark-menu.png[Rename bookmark menu] + +. In the *Name* field, enter the new name for the bookmark. ++ +image::rename-bookmark-text.png[Bookmark name field] + +. Click btn:[Rename]. + +.Verification + +* Check that the side bar lists the bookmark under the new name. ++ +image::renamed-bookmark.png[Renamed bookmark] \ No newline at end of file diff --git a/tests/data/groundtruth/docling_v2/test_03.asciidoc.md b/tests/data/groundtruth/docling_v2/test_03.asciidoc.md new file mode 100644 index 0000000..394405c --- /dev/null +++ b/tests/data/groundtruth/docling_v2/test_03.asciidoc.md @@ -0,0 +1,23 @@ +:\_mod-docs-content-type: PROCEDURE :experimental: + +# Renaming a bookmark + +[id="renaming-a-bookmark\_{context}"] + +You can rename a bookmark to distinguish it from other bookmarks. If you have bookmarks to several folders that all share the same name, you can tell the bookmarks apart if you rename them. + +Renaming the bookmark does not rename the folder. + +- Check that the side bar lists the bookmark under the new name. + +Procedure . Right-click the bookmark in the side bar. . Select *Rename…*. + + + + + In the *Name* field, enter the new name for the bookmark. + + + + + Click btn:[Rename]. .Verification + + \ No newline at end of file diff --git a/tests/test_backend_asciidoc.py b/tests/test_backend_asciidoc.py index fc047ba..4decdff 100644 --- a/tests/test_backend_asciidoc.py +++ b/tests/test_backend_asciidoc.py @@ -2,7 +2,11 @@ import glob import os from pathlib import Path -from docling.backend.asciidoc_backend import AsciiDocBackend +from docling.backend.asciidoc_backend import ( + DEFAULT_IMAGE_HEIGHT, + DEFAULT_IMAGE_WIDTH, + AsciiDocBackend, +) from docling.datamodel.base_models import InputFormat from docling.datamodel.document import InputDocument @@ -18,6 +22,24 @@ def _get_backend(fname): return doc_backend +def test_parse_picture(): + line = ( + "image::images/example1.png[Example Image, width=200, height=150, align=center]" + ) + res = AsciiDocBackend._parse_picture(line) + assert res + assert res.get("width", 0) == "200" + assert res.get("height", 0) == "150" + assert res.get("uri", "") == "images/example1.png" + + line = "image::renamed-bookmark.png[Renamed bookmark]" + res = AsciiDocBackend._parse_picture(line) + assert res + assert "width" not in res + assert "height" not in res + assert res.get("uri", "") == "renamed-bookmark.png" + + def test_asciidocs_examples(): fnames = sorted(glob.glob("./tests/data/asciidoc/*.asciidoc"))