diff --git a/requirements.txt b/requirements.txt
index c37fbf5..5ca3a33 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-albumentations==1.4.0
 numpy==1.24.4
 omegaconf==2.3.0
 opencv-python==4.11.0.86
diff --git a/utils/processor.py b/utils/processor.py
index ba89d08..3f96b68 100644
--- a/utils/processor.py
+++ b/utils/processor.py
@@ -1,4 +1,4 @@
-""" 
+"""
 Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 SPDX-License-Identifier: MIT
 """
@@ -6,8 +6,11 @@ SPDX-License-Identifier: MIT
 import numpy as np
 import torch
 from PIL import ImageOps
+from torchvision import transforms
+from torchvision.transforms.functional import resize
 
-from utils.utils import *
+IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
 
 
 class DolphinProcessor:
@@ -34,6 +37,10 @@ class DolphinProcessor:
         self.prefix_answer_space_flag = dp_config.get("prefix_answer_space_flag", True)
         self.suffix_prompt_space_flag = dp_config.get("suffix_prompt_space_flag", True)
 
+        self.transform = transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize(mean=IMAGENET_DEFAULT_MEAN, std=IMAGENET_DEFAULT_STD)]
+        )
+
     def process_prompt_for_inference(self, prompt):
         prompt = prompt.replace("<image>\n", "")
         if not prompt.startswith("<s>"):
@@ -60,5 +67,5 @@ class DolphinProcessor:
         )
         image = ImageOps.expand(image, padding)
         if return_img_size:
-            return test_transform(image).unsqueeze(0), (origin_w, origin_h)
-        return test_transform(image).unsqueeze(0)
+            return self.transform(image).unsqueeze(0), (origin_w, origin_h)
+        return self.transform(image).unsqueeze(0)
diff --git a/utils/utils.py b/utils/utils.py
index 423d0be..999a877 100644
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -1,37 +1,33 @@
-""" 
+"""
 Copyright (c) 2025 Bytedance Ltd. and/or its affiliates
 SPDX-License-Identifier: MIT
 """
 
 import copy
+import io
 import json
 import os
-import io
 import re
 from dataclasses import dataclass
 from typing import List, Tuple
 
-import albumentations as alb
 import cv2
 import numpy as np
-from albumentations.pytorch import ToTensorV2
 import pymupdf
 from PIL import Image
-from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
-from torchvision.transforms.functional import resize
 
 from utils.markdown_utils import MarkdownConverter
 
 
 def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
     """Save cropped figure to local file system
-    
+
     Args:
         pil_crop: PIL Image object of the cropped figure
         save_dir: Base directory to save results
         image_name: Name of the source image/document
         reading_order: Reading order of the figure in the document
-        
+
     Returns:
         str: Filename of the saved figure
     """
@@ -39,17 +35,17 @@ def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
         # Create figures directory if it doesn't exist
         figures_dir = os.path.join(save_dir, "markdown", "figures")
         # os.makedirs(figures_dir, exist_ok=True)
-        
+
         # Generate figure filename
         figure_filename = f"{image_name}_figure_{reading_order:03d}.png"
         figure_path = os.path.join(figures_dir, figure_filename)
-        
+
         # Save the figure
         pil_crop.save(figure_path, format="PNG", quality=95)
-        
+
         # print(f"Saved figure: {figure_filename}")
         return figure_filename
-        
+
     except Exception as e:
         print(f"Error saving figure: {str(e)}")
         # Return a fallback filename
@@ -58,38 +54,38 @@ def save_figure_to_local(pil_crop, save_dir, image_name, reading_order):
 
 def convert_pdf_to_images(pdf_path, target_size=896):
     """Convert PDF pages to images
-    
+
     Args:
         pdf_path: Path to PDF file
         target_size: Target size for the longest dimension
-        
+
     Returns:
         List of PIL Images
     """
     images = []
     try:
         doc = pymupdf.open(pdf_path)
-        
+
         for page_num in range(len(doc)):
             page = doc[page_num]
-            
+
             # Calculate scale to make longest dimension equal to target_size
             rect = page.rect
             scale = target_size / max(rect.width, rect.height)
-            
+
             # Render page as image
             mat = pymupdf.Matrix(scale, scale)
             pix = page.get_pixmap(matrix=mat)
-            
+
             # Convert to PIL Image
             img_data = pix.tobytes("png")
             pil_image = Image.open(io.BytesIO(img_data))
             images.append(pil_image)
-        
+
         doc.close()
         print(f"Successfully converted {len(images)} pages from PDF")
         return images
-        
+
     except Exception as e:
         print(f"Error converting PDF to images: {str(e)}")
         return []
@@ -97,42 +93,38 @@ def convert_pdf_to_images(pdf_path, target_size=896):
 
 def is_pdf_file(file_path):
     """Check if file is a PDF"""
-    return file_path.lower().endswith('.pdf')
+    return file_path.lower().endswith(".pdf")
 
 
 def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
     """Save combined results for multi-page PDF with both JSON and Markdown
-    
+
     Args:
         all_page_results: List of results for all pages
         pdf_path: Path to original PDF file
         save_dir: Directory to save results
-        
+
     Returns:
         Path to saved combined JSON file
     """
     # Create output filename based on PDF name
     base_name = os.path.splitext(os.path.basename(pdf_path))[0]
-    
+
     # Prepare combined results
-    combined_results = {
-        "source_file": pdf_path,
-        "total_pages": len(all_page_results),
-        "pages": all_page_results
-    }
-    
+    combined_results = {"source_file": pdf_path, "total_pages": len(all_page_results), "pages": all_page_results}
+
     # Save combined JSON results
     json_filename = f"{base_name}.json"
     json_path = os.path.join(save_dir, "recognition_json", json_filename)
     os.makedirs(os.path.dirname(json_path), exist_ok=True)
-    
-    with open(json_path, 'w', encoding='utf-8') as f:
+
+    with open(json_path, "w", encoding="utf-8") as f:
         json.dump(combined_results, f, indent=2, ensure_ascii=False)
-    
+
     # Generate and save combined markdown
     try:
         markdown_converter = MarkdownConverter()
-        
+
         # Combine all page results into a single list for markdown conversion
         all_elements = []
         for page_data in all_page_results:
@@ -140,52 +132,33 @@ def save_combined_pdf_results(all_page_results, pdf_path, save_dir):
             if page_elements:
                 # Add page separator if not the first page
                 if all_elements:
-                    all_elements.append({
-                        "label": "page_separator",
-                        "text": f"\n\n---\n\n",
-                        "reading_order": len(all_elements)
-                    })
+                    all_elements.append(
+                        {"label": "page_separator", "text": f"\n\n---\n\n", "reading_order": len(all_elements)}
+                    )
                 all_elements.extend(page_elements)
-        
+
         # Generate markdown content
         markdown_content = markdown_converter.convert(all_elements)
-        
+
         # Save markdown file
         markdown_filename = f"{base_name}.md"
         markdown_path = os.path.join(save_dir, "markdown", markdown_filename)
         os.makedirs(os.path.dirname(markdown_path), exist_ok=True)
-        
-        with open(markdown_path, 'w', encoding='utf-8') as f:
+
+        with open(markdown_path, "w", encoding="utf-8") as f:
             f.write(markdown_content)
-            
+
         # print(f"Combined markdown saved to: {markdown_path}")
-        
+
     except ImportError:
         print("MarkdownConverter not available, skipping markdown generation")
     except Exception as e:
         print(f"Error generating markdown: {e}")
-    
+
     # print(f"Combined JSON results saved to: {json_path}")
     return json_path
 
 
-def alb_wrapper(transform):
-    def f(im):
-        return transform(image=np.asarray(im))["image"]
-
-    return f
-
-
-test_transform = alb_wrapper(
-    alb.Compose(
-        [
-            alb.Normalize(IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD),
-            ToTensorV2(),
-        ]
-    )
-)
-
-
 def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True):
     # print(f"check_coord_valid: {x1}, {y1}, {x2}, {y2}, {image_size}, {abs_coord}")
     if x2 <= x1 or y2 <= y1:
@@ -195,12 +168,12 @@ def check_coord_valid(x1, y1, x2, y2, image_size=None, abs_coord=True):
     if not abs_coord:
         if x2 > 1 or y2 > 1:
             return False, f"[{x1}, {y1}, {x2}, {y2}]"
-    elif image_size is not None: # has image size
+    elif image_size is not None:  # has image size
         if x2 > image_size[0] or y2 > image_size[1]:
             return False, f"[{x1}, {y1}, {x2}, {y2}]"
     return True, None
 
-    
+
 def adjust_box_edges(image, boxes: List[List[float]], max_pixels=15, threshold=0.2):
     """
     Image: cv2.image object, or Path
@@ -276,6 +249,7 @@ def parse_layout_string(bbox_str):
 @dataclass
 class ImageDimensions:
     """Class to store image dimensions"""
+
     original_w: int
     original_h: int
     padded_w: int
@@ -284,11 +258,11 @@ class ImageDimensions:
 
 def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[int, int, int, int]:
     """Map coordinates from padded image back to original image
-    
+
     Args:
         x1, y1, x2, y2: Coordinates in padded image
         dims: Image dimensions object
-        
+
     Returns:
         tuple: (x1, y1, x2, y2) coordinates in original image
     """
@@ -296,19 +270,19 @@ def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[
         # Calculate padding offsets
         top = (dims.padded_h - dims.original_h) // 2
         left = (dims.padded_w - dims.original_w) // 2
-        
+
         # Map back to original coordinates
         orig_x1 = max(0, x1 - left)
         orig_y1 = max(0, y1 - top)
         orig_x2 = min(dims.original_w, x2 - left)
         orig_y2 = min(dims.original_h, y2 - top)
-        
+
         # Ensure we have a valid box (width and height > 0)
         if orig_x2 <= orig_x1:
             orig_x2 = min(orig_x1 + 1, dims.original_w)
         if orig_y2 <= orig_y1:
             orig_y2 = min(orig_y1 + 1, dims.original_h)
-            
+
         return int(orig_x1), int(orig_y1), int(orig_x2), int(orig_y2)
     except Exception as e:
         print(f"map_to_original_coordinates error: {str(e)}")
@@ -318,12 +292,17 @@ def map_to_original_coordinates(x1, y1, x2, y2, dims: ImageDimensions) -> Tuple[
 
 def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions):
     """
-        From absolute coordinates to relevant coordinates
-        e.g. [100, 100, 200, 200] -> [0.1, 0.2, 0.3, 0.4]
+    From absolute coordinates to relevant coordinates
+    e.g. [100, 100, 200, 200] -> [0.1, 0.2, 0.3, 0.4]
     """
     try:
         x1, y1, x2, y2 = abs_coords
-        return round(x1 / dims.original_w, 3), round(y1 / dims.original_h, 3), round(x2 / dims.original_w, 3), round(y2 / dims.original_h, 3)
+        return (
+            round(x1 / dims.original_w, 3),
+            round(y1 / dims.original_h, 3),
+            round(x2 / dims.original_w, 3),
+            round(y2 / dims.original_h, 3),
+        )
     except Exception as e:
         print(f"map_to_relevant_coordinates error: {str(e)}")
         return 0.0, 0.0, 1.0, 1.0  # Return full image coordinates
@@ -331,13 +310,13 @@ def map_to_relevant_coordinates(abs_coords, dims: ImageDimensions):
 
 def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_box=None):
     """Process and adjust coordinates
-    
+
     Args:
         coords: Normalized coordinates [x1, y1, x2, y2]
         padded_image: Padded image
         dims: Image dimensions object
         previous_box: Previous box coordinates for overlap adjustment
-    
+
     Returns:
         tuple: (x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box)
     """
@@ -345,35 +324,35 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
         # Convert normalized coordinates to absolute coordinates
         x1, y1 = int(coords[0] * dims.padded_w), int(coords[1] * dims.padded_h)
         x2, y2 = int(coords[2] * dims.padded_w), int(coords[3] * dims.padded_h)
-        
+
         # Ensure coordinates are within image bounds before adjustment
         x1 = max(0, min(x1, dims.padded_w - 1))
         y1 = max(0, min(y1, dims.padded_h - 1))
         x2 = max(0, min(x2, dims.padded_w))
         y2 = max(0, min(y2, dims.padded_h))
-        
+
         # Ensure width and height are at least 1 pixel
         if x2 <= x1:
             x2 = min(x1 + 1, dims.padded_w)
         if y2 <= y1:
             y2 = min(y1 + 1, dims.padded_h)
-        
+
         # Extend box boundaries
         new_boxes = adjust_box_edges(padded_image, [[x1, y1, x2, y2]])
         x1, y1, x2, y2 = new_boxes[0]
-        
+
         # Ensure coordinates are still within image bounds after adjustment
         x1 = max(0, min(x1, dims.padded_w - 1))
         y1 = max(0, min(y1, dims.padded_h - 1))
         x2 = max(0, min(x2, dims.padded_w))
         y2 = max(0, min(y2, dims.padded_h))
-        
+
         # Ensure width and height are at least 1 pixel after adjustment
         if x2 <= x1:
             x2 = min(x1 + 1, dims.padded_w)
         if y2 <= y1:
             y2 = min(y1 + 1, dims.padded_h)
-        
+
         # Check for overlap with previous box and adjust
         if previous_box is not None:
             prev_x1, prev_y1, prev_x2, prev_y2 = previous_box
@@ -384,15 +363,13 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
                 # Make sure y2 is still greater than y1
                 if y2 <= y1:
                     y2 = min(y1 + 1, dims.padded_h)
-        
+
         # Update previous box
         new_previous_box = [x1, y1, x2, y2]
 
         # Map to original coordinates
-        orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates(
-            x1, y1, x2, y2, dims
-        )
-        
+        orig_x1, orig_y1, orig_x2, orig_y2 = map_to_original_coordinates(x1, y1, x2, y2, dims)
+
         return x1, y1, x2, y2, orig_x1, orig_y1, orig_x2, orig_y2, new_previous_box
     except Exception as e:
         print(f"process_coordinates error: {str(e)}")
@@ -403,10 +380,10 @@ def process_coordinates(coords, padded_image, dims: ImageDimensions, previous_bo
 
 def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]:
     """Load and prepare image with padding while maintaining aspect ratio
-    
+
     Args:
         image: PIL image
-        
+
     Returns:
         tuple: (padded_image, image_dimensions)
     """
@@ -423,29 +400,18 @@ def prepare_image(image) -> Tuple[np.ndarray, ImageDimensions]:
         right = max_size - original_w - left
 
         # Apply padding
-        padded_image = cv2.copyMakeBorder(image, top, bottom, left, right,
-                                cv2.BORDER_CONSTANT, value=(0, 0, 0))
+        padded_image = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))
 
         padded_h, padded_w = padded_image.shape[:2]
-        
-        dimensions = ImageDimensions(
-            original_w=original_w,
-            original_h=original_h,
-            padded_w=padded_w,
-            padded_h=padded_h
-        )
-        
+
+        dimensions = ImageDimensions(original_w=original_w, original_h=original_h, padded_w=padded_w, padded_h=padded_h)
+
         return padded_image, dimensions
     except Exception as e:
         print(f"prepare_image error: {str(e)}")
         # Create a minimal valid image and dimensions
         h, w = image.height, image.width
-        dimensions = ImageDimensions(
-            original_w=w,
-            original_h=h,
-            padded_w=w,
-            padded_h=h
-        )
+        dimensions = ImageDimensions(original_w=w, original_h=h, padded_w=w, padded_h=h)
         # Return a black image of the same size
         return np.zeros((h, w, 3), dtype=np.uint8), dimensions
 
@@ -484,7 +450,7 @@ def crop_margin(img: Image.Image) -> Image.Image:
         if width == 0 or height == 0:
             print("Warning: Image has zero width or height")
             return img
-            
+
         data = np.array(img.convert("L"))
         data = data.astype(np.uint8)
         max_val = data.max()
@@ -498,13 +464,13 @@ def crop_margin(img: Image.Image) -> Image.Image:
         if coords is None:
             return img
         a, b, w, h = cv2.boundingRect(coords)  # Find minimum spanning bounding box
-        
+
         # Ensure crop coordinates are within image bounds
         a = max(0, a)
         b = max(0, b)
         w = min(w, width - a)
         h = min(h, height - b)
-        
+
         # Only crop if we have a valid region
         if w > 0 and h > 0:
             return img.crop((a, b, a + w, b + h))