remove 'albumentations'
This commit is contained in:
parent
98b8ccc38d
commit
4edac82fc3
@ -1,4 +1,3 @@
|
||||
albumentations==1.4.0
|
||||
numpy==1.24.4
|
||||
omegaconf==2.3.0
|
||||
opencv-python==4.11.0.86
|
||||
|
@ -6,8 +6,11 @@ SPDX-License-Identifier: MIT
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL import ImageOps
|
||||
from torchvision import transforms
|
||||
from torchvision.transforms.functional import resize
|
||||
|
||||
from utils.utils import *
|
||||
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
|
||||
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
|
||||
|
||||
|
||||
class DolphinProcessor:
|
||||
@ -34,6 +37,10 @@ class DolphinProcessor:
|
||||
self.prefix_answer_space_flag = dp_config.get("prefix_answer_space_flag", True)
|
||||
self.suffix_prompt_space_flag = dp_config.get("suffix_prompt_space_flag", True)
|
||||
|
||||
self.transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)]
|
||||
)
|
||||
|
||||
def process_prompt_for_inference(self, prompt):
|
||||
prompt = prompt.replace("<image>\n", "")
|
||||
if not prompt.startswith("<s>"):
|
||||
@ -60,5 +67,5 @@ class DolphinProcessor:
|
||||
)
|
||||
image = ImageOps.expand(image, padding)
|
||||
if return_img_size:
|
||||
return test_transform(image).unsqueeze(0), (origin_w, origin_h)
|
||||
return test_transform(image).unsqueeze(0)
|
||||
return self.transform(image).unsqueeze(0), (origin_w, origin_h)
|
||||
return self.transform(image).unsqueeze(0)
|
||||
|
@ -4,21 +4,17 @@ SPDX-License-Identifier: MIT
|
||||
"""
|
||||
|
||||
import copy
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import io
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Tuple
|
||||
|
||||
import albumentations as alb
|
||||
import cv2
|
||||
import numpy as np
|
||||
from albumentations.pytorch import ToTensorV2
|
||||
import pymupdf
|
||||
from PIL import Image
|
||||
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
|
||||
from torchvision.transforms.functional import resize
|
||||
|
||||
from utils.markdown_utils import MarkdownConverter
|
||||
|
||||
@ -97,7 +93,7 @@ def convert_pdf_to_images(pdf_path, target_size=896):
|
||||
|
||||
def is_pdf_file(file_path):
|
||||
"""Check if file is a PDF"""
|
||||
return file_path.lower().endswith('.pdf')
|
||||
return file_path.lower().endswith(".pdf")
|
||||
|
||||
|
||||
def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
|
||||
@ -115,18 +111,14 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
|
||||
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
|
||||
|
||||
# Prepare combined results
|
||||
combined_results = {
|
||||
"source_file": pdf_path,
|
||||
"total_pages": len(all_page_results),
|
||||
"pages": all_page_results
|
||||
}
|
||||
combined_results = {"source_file": pdf_path, "total_pages": len(all_page_results), "pages": all_page_results}
|
||||
|
||||
# Save combined JSON results
|
||||
json_filename = f"{base_name}.json"
|
||||
json_path = os.path.join(save_dir, "recognition_json", json_filename)
|
||||
os.makedirs(os.path.dirname(json_path), exist_ok=True)
|
||||
|
||||
with open(json_path, 'w', encoding='utf-8') as f:
|
||||
with open(json_path, "w", encoding="utf-8") as f:
|
||||
json.dump(combined_results, f, indent=2, ensure_ascii=False)
|
||||
|
||||
# Generate and save combined markdown
|
||||
@ -140,11 +132,9 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
|
||||
if page_elements:
|
||||
# Add page separator if not the first page
|
||||
if all_elements:
|
||||
all_elements.append({
|
||||
"label": "page_separator",
|
||||
"text": f"\n\n---\n\n",
|
||||
"reading_order": len(all_elements)
|
||||
})
|
||||
all_elements.append(
|
||||
{"label": "page_separator", "text": f"\n\n---\n\n", "reading_order": len(all_elements)}
|
||||
)
|
||||
all_elements.extend(page_elements)
|
||||
|
||||
# Generate markdown content
|
||||
@ -155,7 +145,7 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
|
||||
markdown_path = os.path.join(save_dir, "markdown", markdown_filename)
|
||||
os.makedirs(os.path.dirname(markdown_path), exist_ok=True)
|
||||
|
||||
with open(markdown_path, 'w', encoding='utf-8') as f:
|
||||
with open(markdown_path, "w", encoding="utf-8") as f:
|
||||
f.write(markdown_content)
|
||||
|
||||
# print(f"Combined markdown saved to: {markdown_path}")
|
||||
@ -169,23 +159,6 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
|
||||
return json_path
|
||||
|
||||
|
||||
def alb_wrapper(transform):
|
||||
def f(im):
|
||||
return transform(image=np.asarray(im))["image"]
|
||||
|
||||
return f
|
||||
|
||||
|
||||
test_transform = alb_wrapper(
|
||||
alb.Compose(
|
||||
[
|
||||
alb.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
|
||||
ToTensorV2(),
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True):
|
||||
# print(f"check_coord_valid: {x1}, {y1}, {x2}, {y2}, {image_size}, {abs_coord}")
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
@ -276,6 +249,7 @@ def parse_layout_string(bbox_str):
|
||||
@dataclass
|
||||
class ImageDimensions:
|
||||
"""Class to store image dimensions"""
|
||||
|
||||
original_w: int
|
||||
original_h: int
|
||||
padded_w: int
|
||||
@ -323,7 +297,12 @@ def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions):
|
||||
"""
|
||||
try:
|
||||
x1, y1, x2, y2 = abs_coords
|
||||
return round(x1 / dims.original_w, 3), round(y1 / dims.original_h, 3), round(x2 / dims.original_w, 3), round(y2 / dims.original_h, 3)
|
||||
return (
|
||||
round(x1 / dims.original_w, 3),
|
||||
round(y1 / dims.original_h, 3),
|
||||
round(x2 / dims.original_w, 3),
|
||||
round(y2 / dims.original_h, 3),
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"map_to_relevant_coordinates error: {str(e)}")
|
||||
return 0.0, 0.0, 1.0, 1.0 # Return full image coordinates
|
||||
@ -389,9 +368,7 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
|
||||
new_previous_box = [x1, y1, x2, y2]
|
||||
|
||||
# Map to original coordinates
|
||||
orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates(
|
||||
x1, y1, x2, y2, dims
|
||||
)
|
||||
orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates(x1, y1, x2, y2, dims)
|
||||
|
||||
return x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box
|
||||
except Exception as e:
|
||||
@ -423,29 +400,18 @@ def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]:
|
||||
right = max_size - original_w - left
|
||||
|
||||
# Apply padding
|
||||
padded_image = cv2.copyMakeBorder(image, top, bottom, left, right,
|
||||
cv2.BORDER_CONSTANT, value=(0, 0, 0))
|
||||
padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
|
||||
|
||||
padded_h, padded_w = padded_image.shape[:2]
|
||||
|
||||
dimensions = ImageDimensions(
|
||||
original_w=original_w,
|
||||
original_h=original_h,
|
||||
padded_w=padded_w,
|
||||
padded_h=padded_h
|
||||
)
|
||||
dimensions = ImageDimensions(original_w=original_w, original_h=original_h, padded_w=padded_w, padded_h=padded_h)
|
||||
|
||||
return padded_image, dimensions
|
||||
except Exception as e:
|
||||
print(f"prepare_image error: {str(e)}")
|
||||
# Create a minimal valid image and dimensions
|
||||
h, w = image.height, image.width
|
||||
dimensions = ImageDimensions(
|
||||
original_w=w,
|
||||
original_h=h,
|
||||
padded_w=w,
|
||||
padded_h=h
|
||||
)
|
||||
dimensions = ImageDimensions(original_w=w, original_h=h, padded_w=w, padded_h=h)
|
||||
# Return a black image of the same size
|
||||
return np.zeros((h, w, 3), dtype=np.uint8), dimensions
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user