chore: Safer unloading of DPv4 backend (#1867)

fix: Safer unloading of DPv4 backend

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2025-06-30 14:41:21 +02:00 committed by GitHub
parent ae39a9411a
commit bdfee4e2d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 1262 additions and 344 deletions

View File

@ -187,7 +187,17 @@ class DoclingParseV4DocumentBackend(PdfDocumentBackend):
def unload(self):
super().unload()
self.dp_doc.unload()
with pypdfium2_lock:
self._pdoc.close()
self._pdoc = None
# Unload docling-parse document first
if self.dp_doc is not None:
self.dp_doc.unload()
self.dp_doc = None
# Then close pypdfium2 document with proper locking
if self._pdoc is not None:
with pypdfium2_lock:
try:
self._pdoc.close()
except Exception:
# Ignore cleanup errors
pass
self._pdoc = None

View File

@ -46,6 +46,12 @@ def test_text_cell_counts():
)
last_cell_count = len(cells)
# Clean up page backend after each iteration
page_backend.unload()
# Explicitly clean up document backend to prevent race conditions in CI
doc_backend.unload()
def test_get_text_from_rect(test_doc_path):
doc_backend = _get_backend(test_doc_path)
@ -59,6 +65,10 @@ def test_get_text_from_rect(test_doc_path):
assert textpiece.strip() == ref
# Explicitly clean up resources
page_backend.unload()
doc_backend.unload()
def test_crop_page_image(test_doc_path):
doc_backend = _get_backend(test_doc_path)
@ -70,7 +80,14 @@ def test_crop_page_image(test_doc_path):
)
# im.show()
# Explicitly clean up resources
page_backend.unload()
doc_backend.unload()
def test_num_pages(test_doc_path):
doc_backend = _get_backend(test_doc_path)
doc_backend.page_count() == 9
# Explicitly clean up resources to prevent race conditions in CI
doc_backend.unload()

1571
uv.lock

File diff suppressed because it is too large Load Diff