remove 'albumentations'

This commit is contained in:
yingdong.han 2025-06-26 19:45:12 +08:00
parent 98b8ccc38d
commit 4edac82fc3
3 changed files with 84 additions and 112 deletions

View File

@ -1,4 +1,3 @@
albumentations==1.4.0
numpy==1.24.4 numpy==1.24.4
omegaconf==2.3.0 omegaconf==2.3.0
opencv-python==4.11.0.86 opencv-python==4.11.0.86

View File

@ -1,4 +1,4 @@
""" """
Copyright (c) 2025 Bytedance Ltd. and/or its affiliates Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
@ -6,8 +6,11 @@ SPDX-License-Identifier: MIT
import numpy as np import numpy as np
import torch import torch
from PIL import ImageOps from PIL import ImageOps
from torchvision import transforms
from torchvision.transforms.functional import resize
from utils.utils import * IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
class DolphinProcessor: class DolphinProcessor:
@ -34,6 +37,10 @@ class DolphinProcessor:
self.prefix_answer_space_flag = dp_config.get("prefix_answer_space_flag", True) self.prefix_answer_space_flag = dp_config.get("prefix_answer_space_flag", True)
self.suffix_prompt_space_flag = dp_config.get("suffix_prompt_space_flag", True) self.suffix_prompt_space_flag = dp_config.get("suffix_prompt_space_flag", True)
self.transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)]
)
def process_prompt_for_inference(self, prompt): def process_prompt_for_inference(self, prompt):
prompt = prompt.replace("<image>\n", "") prompt = prompt.replace("<image>\n", "")
if not prompt.startswith("<s>"): if not prompt.startswith("<s>"):
@ -60,5 +67,5 @@ class DolphinProcessor:
) )
image = ImageOps.expand(image, padding) image = ImageOps.expand(image, padding)
if return_img_size: if return_img_size:
return test_transform(image).unsqueeze(0), (origin_w, origin_h) return self.transform(image).unsqueeze(0), (origin_w, origin_h)
return test_transform(image).unsqueeze(0) return self.transform(image).unsqueeze(0)

View File

@ -1,37 +1,33 @@
""" """
Copyright (c) 2025 Bytedance Ltd. and/or its affiliates Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
SPDX-License-Identifier: MIT SPDX-License-Identifier: MIT
""" """
import copy import copy
import io
import json import json
import os import os
import io
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Tuple from typing import List, Tuple
import albumentations as alb
import cv2 import cv2
import numpy as np import numpy as np
from albumentations.pytorch import ToTensorV2
import pymupdf import pymupdf
from PIL import Image from PIL import Image
from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from torchvision.transforms.functional import resize
from utils.markdown_utils import MarkdownConverter from utils.markdown_utils import MarkdownConverter
def save_figure_to_local(pil_crop, save_dir, image_name, reading_order): def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
"""Save cropped figure to local file system """Save cropped figure to local file system
Args: Args:
pil_crop: PIL Image object of the cropped figure pil_crop: PIL Image object of the cropped figure
save_dir: Base directory to save results save_dir: Base directory to save results
image_name: Name of the source image/document image_name: Name of the source image/document
reading_order: Reading order of the figure in the document reading_order: Reading order of the figure in the document
Returns: Returns:
str: Filename of the saved figure str: Filename of the saved figure
""" """
@ -39,17 +35,17 @@ def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
# Create figures directory if it doesn't exist # Create figures directory if it doesn't exist
figures_dir = os.path.join(save_dir, "markdown", "figures") figures_dir = os.path.join(save_dir, "markdown", "figures")
# os.makedirs(figures_dir, exist_ok=True) # os.makedirs(figures_dir, exist_ok=True)
# Generate figure filename # Generate figure filename
figure_filename = f"{image_name}_figure_{reading_order:03d}.png" figure_filename = f"{image_name}_figure_{reading_order:03d}.png"
figure_path = os.path.join(figures_dir, figure_filename) figure_path = os.path.join(figures_dir, figure_filename)
# Save the figure # Save the figure
pil_crop.save(figure_path, format="PNG", quality=95) pil_crop.save(figure_path, format="PNG", quality=95)
# print(f"Saved figure: {figure_filename}") # print(f"Saved figure: {figure_filename}")
return figure_filename return figure_filename
except Exception as e: except Exception as e:
print(f"Error saving figure: {str(e)}") print(f"Error saving figure: {str(e)}")
# Return a fallback filename # Return a fallback filename
@ -58,38 +54,38 @@ def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
def convert_pdf_to_images(pdf_path, target_size=896): def convert_pdf_to_images(pdf_path, target_size=896):
"""Convert PDF pages to images """Convert PDF pages to images
Args: Args:
pdf_path: Path to PDF file pdf_path: Path to PDF file
target_size: Target size for the longest dimension target_size: Target size for the longest dimension
Returns: Returns:
List of PIL Images List of PIL Images
""" """
images = [] images = []
try: try:
doc = pymupdf.open(pdf_path) doc = pymupdf.open(pdf_path)
for page_num in range(len(doc)): for page_num in range(len(doc)):
page = doc[page_num] page = doc[page_num]
# Calculate scale to make longest dimension equal to target_size # Calculate scale to make longest dimension equal to target_size
rect = page.rect rect = page.rect
scale = target_size / max(rect.width, rect.height) scale = target_size / max(rect.width, rect.height)
# Render page as image # Render page as image
mat = pymupdf.Matrix(scale, scale) mat = pymupdf.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat) pix = page.get_pixmap(matrix=mat)
# Convert to PIL Image # Convert to PIL Image
img_data = pix.tobytes("png") img_data = pix.tobytes("png")
pil_image = Image.open(io.BytesIO(img_data)) pil_image = Image.open(io.BytesIO(img_data))
images.append(pil_image) images.append(pil_image)
doc.close() doc.close()
print(f"Successfully converted {len(images)} pages from PDF") print(f"Successfully converted {len(images)} pages from PDF")
return images return images
except Exception as e: except Exception as e:
print(f"Error converting PDF to images: {str(e)}") print(f"Error converting PDF to images: {str(e)}")
return [] return []
@ -97,42 +93,38 @@ def convert_pdf_to_images(pdf_path, target_size=896):
def is_pdf_file(file_path): def is_pdf_file(file_path):
"""Check if file is a PDF""" """Check if file is a PDF"""
return file_path.lower().endswith('.pdf') return file_path.lower().endswith(".pdf")
def save_combined_pdf_results(all_page_results, pdf_path, save_dir): def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
"""Save combined results for multi-page PDF with both JSON and Markdown """Save combined results for multi-page PDF with both JSON and Markdown
Args: Args:
all_page_results: List of results for all pages all_page_results: List of results for all pages
pdf_path: Path to original PDF file pdf_path: Path to original PDF file
save_dir: Directory to save results save_dir: Directory to save results
Returns: Returns:
Path to saved combined JSON file Path to saved combined JSON file
""" """
# Create output filename based on PDF name # Create output filename based on PDF name
base_name = os.path.splitext(os.path.basename(pdf_path))[0] base_name = os.path.splitext(os.path.basename(pdf_path))[0]
# Prepare combined results # Prepare combined results
combined_results = { combined_results = {"source_file": pdf_path, "total_pages": len(all_page_results), "pages": all_page_results}
"source_file": pdf_path,
"total_pages": len(all_page_results),
"pages": all_page_results
}
# Save combined JSON results # Save combined JSON results
json_filename = f"{base_name}.json" json_filename = f"{base_name}.json"
json_path = os.path.join(save_dir, "recognition_json", json_filename) json_path = os.path.join(save_dir, "recognition_json", json_filename)
os.makedirs(os.path.dirname(json_path), exist_ok=True) os.makedirs(os.path.dirname(json_path), exist_ok=True)
with open(json_path, 'w', encoding='utf-8') as f: with open(json_path, "w", encoding="utf-8") as f:
json.dump(combined_results, f, indent=2, ensure_ascii=False) json.dump(combined_results, f, indent=2, ensure_ascii=False)
# Generate and save combined markdown # Generate and save combined markdown
try: try:
markdown_converter = MarkdownConverter() markdown_converter = MarkdownConverter()
# Combine all page results into a single list for markdown conversion # Combine all page results into a single list for markdown conversion
all_elements = [] all_elements = []
for page_data in all_page_results: for page_data in all_page_results:
@ -140,52 +132,33 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
if page_elements: if page_elements:
# Add page separator if not the first page # Add page separator if not the first page
if all_elements: if all_elements:
all_elements.append({ all_elements.append(
"label": "page_separator", {"label": "page_separator", "text": f"\n\n---\n\n", "reading_order": len(all_elements)}
"text": f"\n\n---\n\n", )
"reading_order": len(all_elements)
})
all_elements.extend(page_elements) all_elements.extend(page_elements)
# Generate markdown content # Generate markdown content
markdown_content = markdown_converter.convert(all_elements) markdown_content = markdown_converter.convert(all_elements)
# Save markdown file # Save markdown file
markdown_filename = f"{base_name}.md" markdown_filename = f"{base_name}.md"
markdown_path = os.path.join(save_dir, "markdown", markdown_filename) markdown_path = os.path.join(save_dir, "markdown", markdown_filename)
os.makedirs(os.path.dirname(markdown_path), exist_ok=True) os.makedirs(os.path.dirname(markdown_path), exist_ok=True)
with open(markdown_path, 'w', encoding='utf-8') as f: with open(markdown_path, "w", encoding="utf-8") as f:
f.write(markdown_content) f.write(markdown_content)
# print(f"Combined markdown saved to: {markdown_path}") # print(f"Combined markdown saved to: {markdown_path}")
except ImportError: except ImportError:
print("MarkdownConverter not available, skipping markdown generation") print("MarkdownConverter not available, skipping markdown generation")
except Exception as e: except Exception as e:
print(f"Error generating markdown: {e}") print(f"Error generating markdown: {e}")
# print(f"Combined JSON results saved to: {json_path}") # print(f"Combined JSON results saved to: {json_path}")
return json_path return json_path
def alb_wrapper(transform):
def f(im):
return transform(image=np.asarray(im))["image"]
return f
test_transform = alb_wrapper(
alb.Compose(
[
alb.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
ToTensorV2(),
]
)
)
def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True): def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True):
# print(f"check_coord_valid: {x1}, {y1}, {x2}, {y2}, {image_size}, {abs_coord}") # print(f"check_coord_valid: {x1}, {y1}, {x2}, {y2}, {image_size}, {abs_coord}")
if x2 <= x1 or y2 <= y1: if x2 <= x1 or y2 <= y1:
@ -195,12 +168,12 @@ def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True):
if not abs_coord: if not abs_coord:
if x2 > 1 or y2 > 1: if x2 > 1 or y2 > 1:
return False, f"[{x1}, {y1}, {x2}, {y2}]" return False, f"[{x1}, {y1}, {x2}, {y2}]"
elif image_size is not None: # has image size elif image_size is not None: # has image size
if x2 > image_size[0] or y2 > image_size[1]: if x2 > image_size[0] or y2 > image_size[1]:
return False, f"[{x1}, {y1}, {x2}, {y2}]" return False, f"[{x1}, {y1}, {x2}, {y2}]"
return True, None return True, None
def adjust_box_edges(image, boxes: List[List[float]], max_pixels=15, threshold=0.2): def adjust_box_edges(image, boxes: List[List[float]], max_pixels=15, threshold=0.2):
""" """
Image: cv2.image object, or Path Image: cv2.image object, or Path
@ -276,6 +249,7 @@ def parse_layout_string(bbox_str):
@dataclass @dataclass
class ImageDimensions: class ImageDimensions:
"""Class to store image dimensions""" """Class to store image dimensions"""
original_w: int original_w: int
original_h: int original_h: int
padded_w: int padded_w: int
@ -284,11 +258,11 @@ class ImageDimensions:
def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[int, int, int, int]: def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[int, int, int, int]:
"""Map coordinates from padded image back to original image """Map coordinates from padded image back to original image
Args: Args:
x1, y1, x2, y2: Coordinates in padded image x1, y1, x2, y2: Coordinates in padded image
dims: Image dimensions object dims: Image dimensions object
Returns: Returns:
tuple: (x1, y1, x2, y2) coordinates in original image tuple: (x1, y1, x2, y2) coordinates in original image
""" """
@ -296,19 +270,19 @@ def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[
# Calculate padding offsets # Calculate padding offsets
top = (dims.padded_h - dims.original_h) // 2 top = (dims.padded_h - dims.original_h) // 2
left = (dims.padded_w - dims.original_w) // 2 left = (dims.padded_w - dims.original_w) // 2
# Map back to original coordinates # Map back to original coordinates
orig_x1 = max(0, x1 - left) orig_x1 = max(0, x1 - left)
orig_y1 = max(0, y1 - top) orig_y1 = max(0, y1 - top)
orig_x2 = min(dims.original_w, x2 - left) orig_x2 = min(dims.original_w, x2 - left)
orig_y2 = min(dims.original_h, y2 - top) orig_y2 = min(dims.original_h, y2 - top)
# Ensure we have a valid box (width and height > 0) # Ensure we have a valid box (width and height > 0)
if orig_x2 <= orig_x1: if orig_x2 <= orig_x1:
orig_x2 = min(orig_x1 + 1, dims.original_w) orig_x2 = min(orig_x1 + 1, dims.original_w)
if orig_y2 <= orig_y1: if orig_y2 <= orig_y1:
orig_y2 = min(orig_y1 + 1, dims.original_h) orig_y2 = min(orig_y1 + 1, dims.original_h)
return int(orig_x1), int(orig_y1), int(orig_x2), int(orig_y2) return int(orig_x1), int(orig_y1), int(orig_x2), int(orig_y2)
except Exception as e: except Exception as e:
print(f"map_to_original_coordinates error: {str(e)}") print(f"map_to_original_coordinates error: {str(e)}")
@ -318,12 +292,17 @@ def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[
def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions): def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions):
""" """
From absolute coordinates to relevant coordinates From absolute coordinates to relevant coordinates
e.g. [100, 100, 200, 200] -> [0.1, 0.2, 0.3, 0.4] e.g. [100, 100, 200, 200] -> [0.1, 0.2, 0.3, 0.4]
""" """
try: try:
x1, y1, x2, y2 = abs_coords x1, y1, x2, y2 = abs_coords
return round(x1 / dims.original_w, 3), round(y1 / dims.original_h, 3), round(x2 / dims.original_w, 3), round(y2 / dims.original_h, 3) return (
round(x1 / dims.original_w, 3),
round(y1 / dims.original_h, 3),
round(x2 / dims.original_w, 3),
round(y2 / dims.original_h, 3),
)
except Exception as e: except Exception as e:
print(f"map_to_relevant_coordinates error: {str(e)}") print(f"map_to_relevant_coordinates error: {str(e)}")
return 0.0, 0.0, 1.0, 1.0 # Return full image coordinates return 0.0, 0.0, 1.0, 1.0 # Return full image coordinates
@ -331,13 +310,13 @@ def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions):
def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_box=None): def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_box=None):
"""Process and adjust coordinates """Process and adjust coordinates
Args: Args:
coords: Normalized coordinates [x1, y1, x2, y2] coords: Normalized coordinates [x1, y1, x2, y2]
padded_image: Padded image padded_image: Padded image
dims: Image dimensions object dims: Image dimensions object
previous_box: Previous box coordinates for overlap adjustment previous_box: Previous box coordinates for overlap adjustment
Returns: Returns:
tuple: (x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box) tuple: (x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box)
""" """
@ -345,35 +324,35 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
# Convert normalized coordinates to absolute coordinates # Convert normalized coordinates to absolute coordinates
x1, y1 = int(coords[0] * dims.padded_w), int(coords[1] * dims.padded_h) x1, y1 = int(coords[0] * dims.padded_w), int(coords[1] * dims.padded_h)
x2, y2 = int(coords[2] * dims.padded_w), int(coords[3] * dims.padded_h) x2, y2 = int(coords[2] * dims.padded_w), int(coords[3] * dims.padded_h)
# Ensure coordinates are within image bounds before adjustment # Ensure coordinates are within image bounds before adjustment
x1 = max(0, min(x1, dims.padded_w - 1)) x1 = max(0, min(x1, dims.padded_w - 1))
y1 = max(0, min(y1, dims.padded_h - 1)) y1 = max(0, min(y1, dims.padded_h - 1))
x2 = max(0, min(x2, dims.padded_w)) x2 = max(0, min(x2, dims.padded_w))
y2 = max(0, min(y2, dims.padded_h)) y2 = max(0, min(y2, dims.padded_h))
# Ensure width and height are at least 1 pixel # Ensure width and height are at least 1 pixel
if x2 <= x1: if x2 <= x1:
x2 = min(x1 + 1, dims.padded_w) x2 = min(x1 + 1, dims.padded_w)
if y2 <= y1: if y2 <= y1:
y2 = min(y1 + 1, dims.padded_h) y2 = min(y1 + 1, dims.padded_h)
# Extend box boundaries # Extend box boundaries
new_boxes = adjust_box_edges(padded_image, [[x1, y1, x2, y2]]) new_boxes = adjust_box_edges(padded_image, [[x1, y1, x2, y2]])
x1, y1, x2, y2 = new_boxes[0] x1, y1, x2, y2 = new_boxes[0]
# Ensure coordinates are still within image bounds after adjustment # Ensure coordinates are still within image bounds after adjustment
x1 = max(0, min(x1, dims.padded_w - 1)) x1 = max(0, min(x1, dims.padded_w - 1))
y1 = max(0, min(y1, dims.padded_h - 1)) y1 = max(0, min(y1, dims.padded_h - 1))
x2 = max(0, min(x2, dims.padded_w)) x2 = max(0, min(x2, dims.padded_w))
y2 = max(0, min(y2, dims.padded_h)) y2 = max(0, min(y2, dims.padded_h))
# Ensure width and height are at least 1 pixel after adjustment # Ensure width and height are at least 1 pixel after adjustment
if x2 <= x1: if x2 <= x1:
x2 = min(x1 + 1, dims.padded_w) x2 = min(x1 + 1, dims.padded_w)
if y2 <= y1: if y2 <= y1:
y2 = min(y1 + 1, dims.padded_h) y2 = min(y1 + 1, dims.padded_h)
# Check for overlap with previous box and adjust # Check for overlap with previous box and adjust
if previous_box is not None: if previous_box is not None:
prev_x1, prev_y1, prev_x2, prev_y2 = previous_box prev_x1, prev_y1, prev_x2, prev_y2 = previous_box
@ -384,15 +363,13 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
# Make sure y2 is still greater than y1 # Make sure y2 is still greater than y1
if y2 <= y1: if y2 <= y1:
y2 = min(y1 + 1, dims.padded_h) y2 = min(y1 + 1, dims.padded_h)
# Update previous box # Update previous box
new_previous_box = [x1, y1, x2, y2] new_previous_box = [x1, y1, x2, y2]
# Map to original coordinates # Map to original coordinates
orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates( orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates(x1, y1, x2, y2, dims)
x1, y1, x2, y2, dims
)
return x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box return x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box
except Exception as e: except Exception as e:
print(f"process_coordinates error: {str(e)}") print(f"process_coordinates error: {str(e)}")
@ -403,10 +380,10 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]: def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]:
"""Load and prepare image with padding while maintaining aspect ratio """Load and prepare image with padding while maintaining aspect ratio
Args: Args:
image: PIL image image: PIL image
Returns: Returns:
tuple: (padded_image, image_dimensions) tuple: (padded_image, image_dimensions)
""" """
@ -423,29 +400,18 @@ def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]:
right = max_size - original_w - left right = max_size - original_w - left
# Apply padding # Apply padding
padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
cv2.BORDER_CONSTANT, value=(0, 0, 0))
padded_h, padded_w = padded_image.shape[:2] padded_h, padded_w = padded_image.shape[:2]
dimensions = ImageDimensions( dimensions = ImageDimensions(original_w=original_w, original_h=original_h, padded_w=padded_w, padded_h=padded_h)
original_w=original_w,
original_h=original_h,
padded_w=padded_w,
padded_h=padded_h
)
return padded_image, dimensions return padded_image, dimensions
except Exception as e: except Exception as e:
print(f"prepare_image error: {str(e)}") print(f"prepare_image error: {str(e)}")
# Create a minimal valid image and dimensions # Create a minimal valid image and dimensions
h, w = image.height, image.width h, w = image.height, image.width
dimensions = ImageDimensions( dimensions = ImageDimensions(original_w=w, original_h=h, padded_w=w, padded_h=h)
original_w=w,
original_h=h,
padded_w=w,
padded_h=h
)
# Return a black image of the same size # Return a black image of the same size
return np.zeros((h, w, 3), dtype=np.uint8), dimensions return np.zeros((h, w, 3), dtype=np.uint8), dimensions
@ -484,7 +450,7 @@ def crop_margin(img: Image.Image) -> Image.Image:
if width == 0 or height == 0: if width == 0 or height == 0:
print("Warning: Image has zero width or height") print("Warning: Image has zero width or height")
return img return img
data = np.array(img.convert("L")) data = np.array(img.convert("L"))
data = data.astype(np.uint8) data = data.astype(np.uint8)
max_val = data.max() max_val = data.max()
@ -498,13 +464,13 @@ def crop_margin(img: Image.Image) -> Image.Image:
if coords is None: if coords is None:
return img return img
a, b, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box a, b, w, h = cv2.boundingRect(coords) # Find minimum spanning bounding box
# Ensure crop coordinates are within image bounds # Ensure crop coordinates are within image bounds
a = max(0, a) a = max(0, a)
b = max(0, b) b = max(0, b)
w = min(w, width - a) w = min(w, width - a)
h = min(h, height - b) h = min(h, height - b)
# Only crop if we have a valid region # Only crop if we have a valid region
if w > 0 and h > 0: if w > 0 and h > 0:
return img.crop((a, b, a + w, b + h)) return img.crop((a, b, a + w, b + h))