From 0b707d0882f5be42505871799387d0b1882bffbf Mon Sep 17 00:00:00 2001 From: Rafael Teixeira de Lima Date: Wed, 19 Mar 2025 10:34:25 +0100 Subject: [PATCH] fix(msword): Fixing function return in equations handling (#1194) * Fixing function return Signed-off-by: Rafael Teixeira de Lima * Add message Signed-off-by: Rafael Teixeira de Lima --------- Signed-off-by: Rafael Teixeira de Lima --- docling/backend/msword_backend.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/docling/backend/msword_backend.py b/docling/backend/msword_backend.py index 926ce08..78fe7df 100644 --- a/docling/backend/msword_backend.py +++ b/docling/backend/msword_backend.py @@ -275,8 +275,10 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): only_equations.append(latex_equation) texts_and_equations.append(latex_equation) - if "".join(only_texts) != text: - return text + if "".join(only_texts).strip() != text.strip(): + # If we are not able to reconstruct the initial raw text + # do not try to parse equations and return the original + return text, [] return "".join(texts_and_equations), only_equations @@ -365,6 +367,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend): for eq in equations: if len(text_tmp) == 0: break + pre_eq_text = text_tmp.split(eq, maxsplit=1)[0] text_tmp = text_tmp.split(eq, maxsplit=1)[1] if len(pre_eq_text) > 0: