From cc453961a9196c79f6428305b9007402e448f300 Mon Sep 17 00:00:00 2001 From: Zach Cox Date: Wed, 30 Apr 2025 02:02:52 -0400 Subject: [PATCH] fix: enable cuda_use_flash_attention2 for PictureDescriptionVlmModel (#1496) fix: enable use_cuda_flash_attention2 for PictureDescriptionVlmModel Signed-off-by: Zach Cox --- docling/models/picture_description_vlm_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docling/models/picture_description_vlm_model.py b/docling/models/picture_description_vlm_model.py index 374f575..679e80c 100644 --- a/docling/models/picture_description_vlm_model.py +++ b/docling/models/picture_description_vlm_model.py @@ -57,7 +57,10 @@ class PictureDescriptionVlmModel(PictureDescriptionBaseModel): artifacts_path, torch_dtype=torch.bfloat16, _attn_implementation=( - "flash_attention_2" if self.device.startswith("cuda") else "eager" + "flash_attention_2" + if self.device.startswith("cuda") + and accelerator_options.cuda_use_flash_attention2 + else "eager" ), ).to(self.device)