feat: Add adaptive OCR, factor out treatment of OCR areas and cell filtering (#38)
* Introduce adaptive OCR Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Factor out BaseOcrModel, add docling-parse backend tests, fixes * Make easyocr default dep Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
47b8ad917e
commit
e94d317c02
@ -18,6 +18,10 @@ class PdfPageBackend(ABC):
|
|||||||
def get_text_cells(self) -> Iterable["Cell"]:
|
def get_text_cells(self) -> Iterable["Cell"]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_bitmap_rects(self, scale: int = 1) -> Iterable["BoundingBox"]:
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_page_image(
|
def get_page_image(
|
||||||
self, scale: int = 1, cropbox: Optional["BoundingBox"] = None
|
self, scale: int = 1, cropbox: Optional["BoundingBox"] = None
|
||||||
|
@ -3,7 +3,7 @@ import random
|
|||||||
import time
|
import time
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import Iterable, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
from docling_parse.docling_parse import pdf_parser
|
from docling_parse.docling_parse import pdf_parser
|
||||||
@ -43,7 +43,7 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
r=x1 * scale * page_size.width / parser_width,
|
r=x1 * scale * page_size.width / parser_width,
|
||||||
t=y1 * scale * page_size.height / parser_height,
|
t=y1 * scale * page_size.height / parser_height,
|
||||||
coord_origin=CoordOrigin.BOTTOMLEFT,
|
coord_origin=CoordOrigin.BOTTOMLEFT,
|
||||||
).to_top_left_origin(page_size.height * scale)
|
).to_top_left_origin(page_height=page_size.height * scale)
|
||||||
|
|
||||||
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
|
overlap_frac = cell_bbox.intersection_area_with(bbox) / cell_bbox.area()
|
||||||
|
|
||||||
@ -66,6 +66,12 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
for i in range(len(self._dpage["cells"])):
|
for i in range(len(self._dpage["cells"])):
|
||||||
rect = self._dpage["cells"][i]["box"]["device"]
|
rect = self._dpage["cells"][i]["box"]["device"]
|
||||||
x0, y0, x1, y1 = rect
|
x0, y0, x1, y1 = rect
|
||||||
|
|
||||||
|
if x1 < x0:
|
||||||
|
x0, x1 = x1, x0
|
||||||
|
if y1 < y0:
|
||||||
|
y0, y1 = y1, y0
|
||||||
|
|
||||||
text_piece = self._dpage["cells"][i]["content"]["rnormalized"]
|
text_piece = self._dpage["cells"][i]["content"]["rnormalized"]
|
||||||
cells.append(
|
cells.append(
|
||||||
Cell(
|
Cell(
|
||||||
@ -108,6 +114,20 @@ class DoclingParsePageBackend(PdfPageBackend):
|
|||||||
|
|
||||||
return cells
|
return cells
|
||||||
|
|
||||||
|
def get_bitmap_rects(self, scale: int = 1) -> Iterable[BoundingBox]:
|
||||||
|
AREA_THRESHOLD = 32 * 32
|
||||||
|
|
||||||
|
for i in range(len(self._dpage["images"])):
|
||||||
|
bitmap = self._dpage["images"][i]
|
||||||
|
cropbox = BoundingBox.from_tuple(
|
||||||
|
bitmap["box"], origin=CoordOrigin.BOTTOMLEFT
|
||||||
|
).to_top_left_origin(self.get_size().height)
|
||||||
|
|
||||||
|
if cropbox.area() > AREA_THRESHOLD:
|
||||||
|
cropbox = cropbox.scaled(scale=scale)
|
||||||
|
|
||||||
|
yield cropbox
|
||||||
|
|
||||||
def get_page_image(
|
def get_page_image(
|
||||||
self, scale: int = 1, cropbox: Optional[BoundingBox] = None
|
self, scale: int = 1, cropbox: Optional[BoundingBox] = None
|
||||||
) -> Image.Image:
|
) -> Image.Image:
|
||||||
@ -173,7 +193,7 @@ class DoclingParseDocumentBackend(PdfDocumentBackend):
|
|||||||
def page_count(self) -> int:
|
def page_count(self) -> int:
|
||||||
return len(self._parser_doc["pages"])
|
return len(self._parser_doc["pages"])
|
||||||
|
|
||||||
def load_page(self, page_no: int) -> PdfPage:
|
def load_page(self, page_no: int) -> DoclingParsePageBackend:
|
||||||
return DoclingParsePageBackend(
|
return DoclingParsePageBackend(
|
||||||
self._pdoc[page_no], self._parser_doc["pages"][page_no]
|
self._pdoc[page_no], self._parser_doc["pages"][page_no]
|
||||||
)
|
)
|
||||||
|
@ -4,6 +4,7 @@ from pathlib import Path
|
|||||||
from typing import Iterable, List, Optional, Union
|
from typing import Iterable, List, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
|
import pypdfium2.raw as pdfium_c
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pypdfium2 import PdfPage
|
from pypdfium2 import PdfPage
|
||||||
|
|
||||||
@ -17,6 +18,19 @@ class PyPdfiumPageBackend(PdfPageBackend):
|
|||||||
self._ppage = page_obj
|
self._ppage = page_obj
|
||||||
self.text_page = None
|
self.text_page = None
|
||||||
|
|
||||||
|
def get_bitmap_rects(self, scale: int = 1) -> Iterable[BoundingBox]:
|
||||||
|
AREA_THRESHOLD = 32 * 32
|
||||||
|
for obj in self._ppage.get_objects(filter=[pdfium_c.FPDF_PAGEOBJ_IMAGE]):
|
||||||
|
pos = obj.get_pos()
|
||||||
|
cropbox = BoundingBox.from_tuple(
|
||||||
|
pos, origin=CoordOrigin.BOTTOMLEFT
|
||||||
|
).to_top_left_origin(page_height=self.get_size().height)
|
||||||
|
|
||||||
|
if cropbox.area() > AREA_THRESHOLD:
|
||||||
|
cropbox = cropbox.scaled(scale=scale)
|
||||||
|
|
||||||
|
yield cropbox
|
||||||
|
|
||||||
def get_text_in_rect(self, bbox: BoundingBox) -> str:
|
def get_text_in_rect(self, bbox: BoundingBox) -> str:
|
||||||
if not self.text_page:
|
if not self.text_page:
|
||||||
self.text_page = self._ppage.get_textpage()
|
self.text_page = self._ppage.get_textpage()
|
||||||
@ -208,7 +222,7 @@ class PyPdfiumDocumentBackend(PdfDocumentBackend):
|
|||||||
def page_count(self) -> int:
|
def page_count(self) -> int:
|
||||||
return len(self._pdoc)
|
return len(self._pdoc)
|
||||||
|
|
||||||
def load_page(self, page_no: int) -> PdfPage:
|
def load_page(self, page_no: int) -> PyPdfiumPageBackend:
|
||||||
return PyPdfiumPageBackend(self._pdoc[page_no])
|
return PyPdfiumPageBackend(self._pdoc[page_no])
|
||||||
|
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
|
@ -68,13 +68,21 @@ class BoundingBox(BaseModel):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_tuple(cls, coord: Tuple[float], origin: CoordOrigin):
|
def from_tuple(cls, coord: Tuple[float], origin: CoordOrigin):
|
||||||
if origin == CoordOrigin.TOPLEFT:
|
if origin == CoordOrigin.TOPLEFT:
|
||||||
return BoundingBox(
|
l, t, r, b = coord[0], coord[1], coord[2], coord[3]
|
||||||
l=coord[0], t=coord[1], r=coord[2], b=coord[3], coord_origin=origin
|
if r < l:
|
||||||
)
|
l, r = r, l
|
||||||
|
if b < t:
|
||||||
|
b, t = t, b
|
||||||
|
|
||||||
|
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
|
||||||
elif origin == CoordOrigin.BOTTOMLEFT:
|
elif origin == CoordOrigin.BOTTOMLEFT:
|
||||||
return BoundingBox(
|
l, b, r, t = coord[0], coord[1], coord[2], coord[3]
|
||||||
l=coord[0], b=coord[1], r=coord[2], t=coord[3], coord_origin=origin
|
if r < l:
|
||||||
)
|
l, r = r, l
|
||||||
|
if b > t:
|
||||||
|
b, t = t, b
|
||||||
|
|
||||||
|
return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin)
|
||||||
|
|
||||||
def area(self) -> float:
|
def area(self) -> float:
|
||||||
return (self.r - self.l) * (self.b - self.t)
|
return (self.r - self.l) * (self.b - self.t)
|
||||||
@ -280,7 +288,7 @@ class TableStructureOptions(BaseModel):
|
|||||||
|
|
||||||
class PipelineOptions(BaseModel):
|
class PipelineOptions(BaseModel):
|
||||||
do_table_structure: bool = True # True: perform table structure extraction
|
do_table_structure: bool = True # True: perform table structure extraction
|
||||||
do_ocr: bool = False # True: perform OCR, replace programmatic PDF text
|
do_ocr: bool = True # True: perform OCR, replace programmatic PDF text
|
||||||
|
|
||||||
table_structure_options: TableStructureOptions = TableStructureOptions()
|
table_structure_options: TableStructureOptions = TableStructureOptions()
|
||||||
|
|
||||||
|
@ -35,8 +35,6 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
class DocumentConverter:
|
class DocumentConverter:
|
||||||
_layout_model_path = "model_artifacts/layout/beehive_v0.0.5"
|
|
||||||
_table_model_path = "model_artifacts/tableformer"
|
|
||||||
_default_download_filename = "file.pdf"
|
_default_download_filename = "file.pdf"
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
124
docling/models/base_ocr_model.py
Normal file
124
docling/models/base_ocr_model.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
import copy
|
||||||
|
import logging
|
||||||
|
from abc import abstractmethod
|
||||||
|
from typing import Iterable, List, Tuple
|
||||||
|
|
||||||
|
import numpy
|
||||||
|
import numpy as np
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
from rtree import index
|
||||||
|
from scipy.ndimage import find_objects, label
|
||||||
|
|
||||||
|
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
|
||||||
|
|
||||||
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class BaseOcrModel:
|
||||||
|
def __init__(self, config):
|
||||||
|
self.config = config
|
||||||
|
self.enabled = config["enabled"]
|
||||||
|
|
||||||
|
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
||||||
|
def get_ocr_rects(self, page: Page) -> Tuple[bool, List[BoundingBox]]:
|
||||||
|
BITMAP_COVERAGE_TRESHOLD = 0.75
|
||||||
|
|
||||||
|
def find_ocr_rects(size, bitmap_rects):
|
||||||
|
image = Image.new(
|
||||||
|
"1", (round(size.width), round(size.height))
|
||||||
|
) # '1' mode is binary
|
||||||
|
|
||||||
|
# Draw all bitmap rects into a binary image
|
||||||
|
draw = ImageDraw.Draw(image)
|
||||||
|
for rect in bitmap_rects:
|
||||||
|
x0, y0, x1, y1 = rect.as_tuple()
|
||||||
|
x0, y0, x1, y1 = round(x0), round(y0), round(x1), round(y1)
|
||||||
|
draw.rectangle([(x0, y0), (x1, y1)], fill=1)
|
||||||
|
|
||||||
|
np_image = np.array(image)
|
||||||
|
|
||||||
|
# Find the connected components
|
||||||
|
labeled_image, num_features = label(
|
||||||
|
np_image > 0
|
||||||
|
) # Label black (0 value) regions
|
||||||
|
|
||||||
|
# Find enclosing bounding boxes for each connected component.
|
||||||
|
slices = find_objects(labeled_image)
|
||||||
|
bounding_boxes = [
|
||||||
|
BoundingBox(
|
||||||
|
l=slc[1].start,
|
||||||
|
t=slc[0].start,
|
||||||
|
r=slc[1].stop - 1,
|
||||||
|
b=slc[0].stop - 1,
|
||||||
|
coord_origin=CoordOrigin.TOPLEFT,
|
||||||
|
)
|
||||||
|
for slc in slices
|
||||||
|
]
|
||||||
|
|
||||||
|
# Compute area fraction on page covered by bitmaps
|
||||||
|
area_frac = np.sum(np_image > 0) / (size.width * size.height)
|
||||||
|
|
||||||
|
return (area_frac, bounding_boxes) # fraction covered # boxes
|
||||||
|
|
||||||
|
bitmap_rects = page._backend.get_bitmap_rects()
|
||||||
|
coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
|
||||||
|
|
||||||
|
# return full-page rectangle if sufficiently covered with bitmaps
|
||||||
|
if coverage > BITMAP_COVERAGE_TRESHOLD:
|
||||||
|
return [
|
||||||
|
BoundingBox(
|
||||||
|
l=0,
|
||||||
|
t=0,
|
||||||
|
r=page.size.width,
|
||||||
|
b=page.size.height,
|
||||||
|
coord_origin=CoordOrigin.TOPLEFT,
|
||||||
|
)
|
||||||
|
]
|
||||||
|
# return individual rectangles if the bitmap coverage is smaller
|
||||||
|
elif coverage < BITMAP_COVERAGE_TRESHOLD:
|
||||||
|
return ocr_rects
|
||||||
|
|
||||||
|
# Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell.
|
||||||
|
def filter_ocr_cells(self, ocr_cells, programmatic_cells):
|
||||||
|
# Create R-tree index for programmatic cells
|
||||||
|
p = index.Property()
|
||||||
|
p.dimension = 2
|
||||||
|
idx = index.Index(properties=p)
|
||||||
|
for i, cell in enumerate(programmatic_cells):
|
||||||
|
idx.insert(i, cell.bbox.as_tuple())
|
||||||
|
|
||||||
|
def is_overlapping_with_existing_cells(ocr_cell):
|
||||||
|
# Query the R-tree to get overlapping rectangles
|
||||||
|
possible_matches_index = list(idx.intersection(ocr_cell.bbox.as_tuple()))
|
||||||
|
|
||||||
|
return (
|
||||||
|
len(possible_matches_index) > 0
|
||||||
|
) # this is a weak criterion but it works.
|
||||||
|
|
||||||
|
filtered_ocr_cells = [
|
||||||
|
rect for rect in ocr_cells if not is_overlapping_with_existing_cells(rect)
|
||||||
|
]
|
||||||
|
return filtered_ocr_cells
|
||||||
|
|
||||||
|
def draw_ocr_rects_and_cells(self, page, ocr_rects):
|
||||||
|
image = copy.deepcopy(page.image)
|
||||||
|
draw = ImageDraw.Draw(image, "RGBA")
|
||||||
|
|
||||||
|
# Draw OCR rectangles as yellow filled rect
|
||||||
|
for rect in ocr_rects:
|
||||||
|
x0, y0, x1, y1 = rect.as_tuple()
|
||||||
|
shade_color = (255, 255, 0, 40) # transparent yellow
|
||||||
|
draw.rectangle([(x0, y0), (x1, y1)], fill=shade_color, outline=None)
|
||||||
|
|
||||||
|
# Draw OCR and programmatic cells
|
||||||
|
for tc in page.cells:
|
||||||
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||||
|
color = "red"
|
||||||
|
if isinstance(tc, OcrCell):
|
||||||
|
color = "magenta"
|
||||||
|
draw.rectangle([(x0, y0), (x1, y1)], outline=color)
|
||||||
|
image.show()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
||||||
|
pass
|
@ -1,20 +1,18 @@
|
|||||||
import copy
|
|
||||||
import logging
|
import logging
|
||||||
import random
|
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
from PIL import ImageDraw
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
|
from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page
|
||||||
|
from docling.models.base_ocr_model import BaseOcrModel
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class EasyOcrModel:
|
class EasyOcrModel(BaseOcrModel):
|
||||||
def __init__(self, config):
|
def __init__(self, config):
|
||||||
self.config = config
|
super().__init__(config)
|
||||||
self.enabled = config["enabled"]
|
|
||||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
@ -29,8 +27,13 @@ class EasyOcrModel:
|
|||||||
return
|
return
|
||||||
|
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
# rects = page._fpage.
|
ocr_rects = self.get_ocr_rects(page)
|
||||||
high_res_image = page.get_image(scale=self.scale)
|
|
||||||
|
all_ocr_cells = []
|
||||||
|
for ocr_rect in ocr_rects:
|
||||||
|
high_res_image = page._backend.get_page_image(
|
||||||
|
scale=self.scale, cropbox=ocr_rect
|
||||||
|
)
|
||||||
im = numpy.array(high_res_image)
|
im = numpy.array(high_res_image)
|
||||||
result = self.reader.readtext(im)
|
result = self.reader.readtext(im)
|
||||||
|
|
||||||
@ -44,34 +47,24 @@ class EasyOcrModel:
|
|||||||
confidence=line[2],
|
confidence=line[2],
|
||||||
bbox=BoundingBox.from_tuple(
|
bbox=BoundingBox.from_tuple(
|
||||||
coord=(
|
coord=(
|
||||||
line[0][0][0] / self.scale,
|
(line[0][0][0] / self.scale) + ocr_rect.l,
|
||||||
line[0][0][1] / self.scale,
|
(line[0][0][1] / self.scale) + ocr_rect.t,
|
||||||
line[0][2][0] / self.scale,
|
(line[0][2][0] / self.scale) + ocr_rect.l,
|
||||||
line[0][2][1] / self.scale,
|
(line[0][2][1] / self.scale) + ocr_rect.t,
|
||||||
),
|
),
|
||||||
origin=CoordOrigin.TOPLEFT,
|
origin=CoordOrigin.TOPLEFT,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
for ix, line in enumerate(result)
|
for ix, line in enumerate(result)
|
||||||
]
|
]
|
||||||
|
all_ocr_cells.extend(cells)
|
||||||
|
|
||||||
page.cells = cells # For now, just overwrites all digital cells.
|
## Remove OCR cells which overlap with programmatic cells.
|
||||||
|
filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
|
||||||
|
|
||||||
|
page.cells.extend(filtered_ocr_cells)
|
||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
def draw_clusters_and_cells():
|
# self.draw_ocr_rects_and_cells(page, ocr_rects)
|
||||||
image = copy.deepcopy(page.image)
|
|
||||||
draw = ImageDraw.Draw(image)
|
|
||||||
|
|
||||||
cell_color = (
|
|
||||||
random.randint(30, 140),
|
|
||||||
random.randint(30, 140),
|
|
||||||
random.randint(30, 140),
|
|
||||||
)
|
|
||||||
for tc in cells:
|
|
||||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
|
||||||
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
|
|
||||||
image.show()
|
|
||||||
|
|
||||||
# draw_clusters_and_cells()
|
|
||||||
|
|
||||||
yield page
|
yield page
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import copy
|
import copy
|
||||||
import random
|
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from abc import abstractmethod
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page, PipelineOptions
|
from docling.datamodel.base_models import PipelineOptions
|
||||||
from docling.models.easyocr_model import EasyOcrModel
|
from docling.models.easyocr_model import EasyOcrModel
|
||||||
from docling.models.layout_model import LayoutModel
|
from docling.models.layout_model import LayoutModel
|
||||||
from docling.models.page_assemble_model import PageAssembleModel
|
|
||||||
from docling.models.table_structure_model import TableStructureModel
|
from docling.models.table_structure_model import TableStructureModel
|
||||||
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
from docling.pipeline.base_model_pipeline import BaseModelPipeline
|
||||||
|
|
||||||
|
48
poetry.lock
generated
48
poetry.lock
generated
@ -966,7 +966,7 @@ pgp = ["gpg"]
|
|||||||
name = "easyocr"
|
name = "easyocr"
|
||||||
version = "1.7.1"
|
version = "1.7.1"
|
||||||
description = "End-to-End Multi-Lingual Optical Character Recognition (OCR) Solution"
|
description = "End-to-End Multi-Lingual Optical Character Recognition (OCR) Solution"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "easyocr-1.7.1-py3-none-any.whl", hash = "sha256:5b0a2e7cfdfc6c1ec99d9583663e570e4189dca6fbf373f074b21b8809e44d2b"},
|
{file = "easyocr-1.7.1-py3-none-any.whl", hash = "sha256:5b0a2e7cfdfc6c1ec99d9583663e570e4189dca6fbf373f074b21b8809e44d2b"},
|
||||||
@ -1336,7 +1336,7 @@ files = [
|
|||||||
name = "imageio"
|
name = "imageio"
|
||||||
version = "2.34.2"
|
version = "2.34.2"
|
||||||
description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
|
description = "Library for reading and writing a wide range of image, video, scientific, and volumetric data formats."
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
files = [
|
files = [
|
||||||
{file = "imageio-2.34.2-py3-none-any.whl", hash = "sha256:a0bb27ec9d5bab36a9f4835e51b21d2cb099e1f78451441f94687ff3404b79f8"},
|
{file = "imageio-2.34.2-py3-none-any.whl", hash = "sha256:a0bb27ec9d5bab36a9f4835e51b21d2cb099e1f78451441f94687ff3404b79f8"},
|
||||||
@ -1760,7 +1760,7 @@ files = [
|
|||||||
name = "lazy-loader"
|
name = "lazy-loader"
|
||||||
version = "0.4"
|
version = "0.4"
|
||||||
description = "Makes it easy to load subpackages and functions on demand."
|
description = "Makes it easy to load subpackages and functions on demand."
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
|
{file = "lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc"},
|
||||||
@ -2389,7 +2389,7 @@ files = [
|
|||||||
name = "ninja"
|
name = "ninja"
|
||||||
version = "1.11.1.1"
|
version = "1.11.1.1"
|
||||||
description = "Ninja is a small build system with a focus on speed"
|
description = "Ninja is a small build system with a focus on speed"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "ninja-1.11.1.1-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:376889c76d87b95b5719fdd61dd7db193aa7fd4432e5d52d2e44e4c497bdbbee"},
|
{file = "ninja-1.11.1.1-py2.py3-none-macosx_10_9_universal2.macosx_10_9_x86_64.macosx_11_0_arm64.macosx_11_0_universal2.whl", hash = "sha256:376889c76d87b95b5719fdd61dd7db193aa7fd4432e5d52d2e44e4c497bdbbee"},
|
||||||
@ -3132,7 +3132,7 @@ global = ["pybind11-global (==2.13.1)"]
|
|||||||
name = "pyclipper"
|
name = "pyclipper"
|
||||||
version = "1.3.0.post5"
|
version = "1.3.0.post5"
|
||||||
description = "Cython wrapper for the C++ translation of the Angus Johnson's Clipper library (ver. 6.4.2)"
|
description = "Cython wrapper for the C++ translation of the Angus Johnson's Clipper library (ver. 6.4.2)"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "pyclipper-1.3.0.post5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c45f99b8180dd4df4c86642657ca92b7d5289a5e3724521822e0f9461961fe2"},
|
{file = "pyclipper-1.3.0.post5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c45f99b8180dd4df4c86642657ca92b7d5289a5e3724521822e0f9461961fe2"},
|
||||||
@ -3535,7 +3535,7 @@ testing = ["filelock"]
|
|||||||
name = "python-bidi"
|
name = "python-bidi"
|
||||||
version = "0.6.0"
|
version = "0.6.0"
|
||||||
description = "Python Bidi layout wrapping the Rust crate unicode-bidi"
|
description = "Python Bidi layout wrapping the Rust crate unicode-bidi"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:032b16f70c5d4f48c8dc5a4ade071826a0fb64172e0435d49deba6ea66fc5d42"},
|
{file = "python_bidi-0.6.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:032b16f70c5d4f48c8dc5a4ade071826a0fb64172e0435d49deba6ea66fc5d42"},
|
||||||
@ -4155,11 +4155,30 @@ files = [
|
|||||||
{file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
|
{file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rtree"
|
||||||
|
version = "1.3.0"
|
||||||
|
description = "R-Tree spatial index for Python GIS"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "Rtree-1.3.0-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:80879d9db282a2273ca3a0d896c84583940e9777477727a277624ebfd424c517"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-macosx_11_0_arm64.whl", hash = "sha256:4328e9e421797c347e6eb08efbbade962fe3664ebd60c1dffe82c40911b1e125"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:037130d3ce1fc029de81941ec416ba5546f66228380ba19bb41f2ea1294e8423"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:864a05d0c3b7ce6c5e34378b7ab630057603b79179368bc50624258bdf2ff631"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ec2ed6d1635753dab966e68f592a9c4896f3f4ec6ad2b09b776d592eacd883a9"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b4485fb3e5c5e85b94a95f0a930a3848e040d2699cfb012940ba5b0130f1e09a"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-musllinux_1_2_i686.whl", hash = "sha256:7e2e9211f4fb404c06a08fd2cbebb03234214f73c51913bb371c3d9954e99cc9"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:c021f4772b25cc24915da8073e553ded6fa8d0b317caa4202255ed26b2344c1c"},
|
||||||
|
{file = "Rtree-1.3.0-py3-none-win_amd64.whl", hash = "sha256:97f835801d24c10bbf02381abe5e327345c8296ec711dde7658792376abafc66"},
|
||||||
|
{file = "rtree-1.3.0.tar.gz", hash = "sha256:b36e9dd2dc60ffe3d02e367242d2c26f7281b00e1aaf0c39590442edaaadd916"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "scikit-image"
|
name = "scikit-image"
|
||||||
version = "0.24.0"
|
version = "0.24.0"
|
||||||
description = "Image processing in Python"
|
description = "Image processing in Python"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "scikit_image-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb3bc0264b6ab30b43c4179ee6156bc18b4861e78bb329dd8d16537b7bbf827a"},
|
{file = "scikit_image-0.24.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cb3bc0264b6ab30b43c4179ee6156bc18b4861e78bb329dd8d16537b7bbf827a"},
|
||||||
@ -4207,7 +4226,7 @@ test = ["asv", "numpydoc (>=1.7)", "pooch (>=1.6.0)", "pytest (>=7.0)", "pytest-
|
|||||||
name = "scipy"
|
name = "scipy"
|
||||||
version = "1.14.0"
|
version = "1.14.0"
|
||||||
description = "Fundamental algorithms for scientific computing in Python"
|
description = "Fundamental algorithms for scientific computing in Python"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.10"
|
python-versions = ">=3.10"
|
||||||
files = [
|
files = [
|
||||||
{file = "scipy-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484"},
|
{file = "scipy-1.14.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7e911933d54ead4d557c02402710c2396529540b81dd554fc1ba270eb7308484"},
|
||||||
@ -4291,41 +4310,35 @@ test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "importlib-metadata
|
|||||||
name = "shapely"
|
name = "shapely"
|
||||||
version = "2.0.5"
|
version = "2.0.5"
|
||||||
description = "Manipulation and analysis of geometric objects"
|
description = "Manipulation and analysis of geometric objects"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "shapely-2.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89d34787c44f77a7d37d55ae821f3a784fa33592b9d217a45053a93ade899375"},
|
{file = "shapely-2.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89d34787c44f77a7d37d55ae821f3a784fa33592b9d217a45053a93ade899375"},
|
||||||
{file = "shapely-2.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:798090b426142df2c5258779c1d8d5734ec6942f778dab6c6c30cfe7f3bf64ff"},
|
{file = "shapely-2.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:798090b426142df2c5258779c1d8d5734ec6942f778dab6c6c30cfe7f3bf64ff"},
|
||||||
{file = "shapely-2.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45211276900c4790d6bfc6105cbf1030742da67594ea4161a9ce6812a6721e68"},
|
|
||||||
{file = "shapely-2.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e119444bc27ca33e786772b81760f2028d930ac55dafe9bc50ef538b794a8e1"},
|
{file = "shapely-2.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e119444bc27ca33e786772b81760f2028d930ac55dafe9bc50ef538b794a8e1"},
|
||||||
{file = "shapely-2.0.5-cp310-cp310-win32.whl", hash = "sha256:9a4492a2b2ccbeaebf181e7310d2dfff4fdd505aef59d6cb0f217607cb042fb3"},
|
{file = "shapely-2.0.5-cp310-cp310-win32.whl", hash = "sha256:9a4492a2b2ccbeaebf181e7310d2dfff4fdd505aef59d6cb0f217607cb042fb3"},
|
||||||
{file = "shapely-2.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:1e5cb5ee72f1bc7ace737c9ecd30dc174a5295fae412972d3879bac2e82c8fae"},
|
{file = "shapely-2.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:1e5cb5ee72f1bc7ace737c9ecd30dc174a5295fae412972d3879bac2e82c8fae"},
|
||||||
{file = "shapely-2.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5bbfb048a74cf273db9091ff3155d373020852805a37dfc846ab71dde4be93ec"},
|
{file = "shapely-2.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5bbfb048a74cf273db9091ff3155d373020852805a37dfc846ab71dde4be93ec"},
|
||||||
{file = "shapely-2.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93be600cbe2fbaa86c8eb70656369f2f7104cd231f0d6585c7d0aa555d6878b8"},
|
{file = "shapely-2.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93be600cbe2fbaa86c8eb70656369f2f7104cd231f0d6585c7d0aa555d6878b8"},
|
||||||
{file = "shapely-2.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f8e71bb9a46814019f6644c4e2560a09d44b80100e46e371578f35eaaa9da1c"},
|
|
||||||
{file = "shapely-2.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5251c28a29012e92de01d2e84f11637eb1d48184ee8f22e2df6c8c578d26760"},
|
{file = "shapely-2.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5251c28a29012e92de01d2e84f11637eb1d48184ee8f22e2df6c8c578d26760"},
|
||||||
{file = "shapely-2.0.5-cp311-cp311-win32.whl", hash = "sha256:35110e80070d664781ec7955c7de557456b25727a0257b354830abb759bf8311"},
|
{file = "shapely-2.0.5-cp311-cp311-win32.whl", hash = "sha256:35110e80070d664781ec7955c7de557456b25727a0257b354830abb759bf8311"},
|
||||||
{file = "shapely-2.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c6b78c0007a34ce7144f98b7418800e0a6a5d9a762f2244b00ea560525290c9"},
|
{file = "shapely-2.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c6b78c0007a34ce7144f98b7418800e0a6a5d9a762f2244b00ea560525290c9"},
|
||||||
{file = "shapely-2.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:03bd7b5fa5deb44795cc0a503999d10ae9d8a22df54ae8d4a4cd2e8a93466195"},
|
{file = "shapely-2.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:03bd7b5fa5deb44795cc0a503999d10ae9d8a22df54ae8d4a4cd2e8a93466195"},
|
||||||
{file = "shapely-2.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ff9521991ed9e201c2e923da014e766c1aa04771bc93e6fe97c27dcf0d40ace"},
|
{file = "shapely-2.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ff9521991ed9e201c2e923da014e766c1aa04771bc93e6fe97c27dcf0d40ace"},
|
||||||
{file = "shapely-2.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b65365cfbf657604e50d15161ffcc68de5cdb22a601bbf7823540ab4918a98d"},
|
|
||||||
{file = "shapely-2.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21f64e647a025b61b19585d2247137b3a38a35314ea68c66aaf507a1c03ef6fe"},
|
{file = "shapely-2.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21f64e647a025b61b19585d2247137b3a38a35314ea68c66aaf507a1c03ef6fe"},
|
||||||
{file = "shapely-2.0.5-cp312-cp312-win32.whl", hash = "sha256:3ac7dc1350700c139c956b03d9c3df49a5b34aaf91d024d1510a09717ea39199"},
|
{file = "shapely-2.0.5-cp312-cp312-win32.whl", hash = "sha256:3ac7dc1350700c139c956b03d9c3df49a5b34aaf91d024d1510a09717ea39199"},
|
||||||
{file = "shapely-2.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:30e8737983c9d954cd17feb49eb169f02f1da49e24e5171122cf2c2b62d65c95"},
|
{file = "shapely-2.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:30e8737983c9d954cd17feb49eb169f02f1da49e24e5171122cf2c2b62d65c95"},
|
||||||
{file = "shapely-2.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ff7731fea5face9ec08a861ed351734a79475631b7540ceb0b66fb9732a5f529"},
|
{file = "shapely-2.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:ff7731fea5face9ec08a861ed351734a79475631b7540ceb0b66fb9732a5f529"},
|
||||||
{file = "shapely-2.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ff9e520af0c5a578e174bca3c18713cd47a6c6a15b6cf1f50ac17dc8bb8db6a2"},
|
|
||||||
{file = "shapely-2.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b299b91557b04acb75e9732645428470825061f871a2edc36b9417d66c1fc5"},
|
{file = "shapely-2.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:49b299b91557b04acb75e9732645428470825061f871a2edc36b9417d66c1fc5"},
|
||||||
{file = "shapely-2.0.5-cp37-cp37m-win32.whl", hash = "sha256:b5870633f8e684bf6d1ae4df527ddcb6f3895f7b12bced5c13266ac04f47d231"},
|
{file = "shapely-2.0.5-cp37-cp37m-win32.whl", hash = "sha256:b5870633f8e684bf6d1ae4df527ddcb6f3895f7b12bced5c13266ac04f47d231"},
|
||||||
{file = "shapely-2.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:401cb794c5067598f50518e5a997e270cd7642c4992645479b915c503866abed"},
|
{file = "shapely-2.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:401cb794c5067598f50518e5a997e270cd7642c4992645479b915c503866abed"},
|
||||||
{file = "shapely-2.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e91ee179af539100eb520281ba5394919067c6b51824e6ab132ad4b3b3e76dd0"},
|
{file = "shapely-2.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e91ee179af539100eb520281ba5394919067c6b51824e6ab132ad4b3b3e76dd0"},
|
||||||
{file = "shapely-2.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8af6f7260f809c0862741ad08b1b89cb60c130ae30efab62320bbf4ee9cc71fa"},
|
{file = "shapely-2.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8af6f7260f809c0862741ad08b1b89cb60c130ae30efab62320bbf4ee9cc71fa"},
|
||||||
{file = "shapely-2.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5456dd522800306ba3faef77c5ba847ec30a0bd73ab087a25e0acdd4db2514f"},
|
|
||||||
{file = "shapely-2.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b714a840402cde66fd7b663bb08cacb7211fa4412ea2a209688f671e0d0631fd"},
|
{file = "shapely-2.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b714a840402cde66fd7b663bb08cacb7211fa4412ea2a209688f671e0d0631fd"},
|
||||||
{file = "shapely-2.0.5-cp38-cp38-win32.whl", hash = "sha256:7e8cf5c252fac1ea51b3162be2ec3faddedc82c256a1160fc0e8ddbec81b06d2"},
|
{file = "shapely-2.0.5-cp38-cp38-win32.whl", hash = "sha256:7e8cf5c252fac1ea51b3162be2ec3faddedc82c256a1160fc0e8ddbec81b06d2"},
|
||||||
{file = "shapely-2.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4461509afdb15051e73ab178fae79974387f39c47ab635a7330d7fee02c68a3f"},
|
{file = "shapely-2.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:4461509afdb15051e73ab178fae79974387f39c47ab635a7330d7fee02c68a3f"},
|
||||||
{file = "shapely-2.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7545a39c55cad1562be302d74c74586f79e07b592df8ada56b79a209731c0219"},
|
{file = "shapely-2.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7545a39c55cad1562be302d74c74586f79e07b592df8ada56b79a209731c0219"},
|
||||||
{file = "shapely-2.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4c83a36f12ec8dee2066946d98d4d841ab6512a6ed7eb742e026a64854019b5f"},
|
{file = "shapely-2.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4c83a36f12ec8dee2066946d98d4d841ab6512a6ed7eb742e026a64854019b5f"},
|
||||||
{file = "shapely-2.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89e640c2cd37378480caf2eeda9a51be64201f01f786d127e78eaeff091ec897"},
|
|
||||||
{file = "shapely-2.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06efe39beafde3a18a21dde169d32f315c57da962826a6d7d22630025200c5e6"},
|
{file = "shapely-2.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06efe39beafde3a18a21dde169d32f315c57da962826a6d7d22630025200c5e6"},
|
||||||
{file = "shapely-2.0.5-cp39-cp39-win32.whl", hash = "sha256:8203a8b2d44dcb366becbc8c3d553670320e4acf0616c39e218c9561dd738d92"},
|
{file = "shapely-2.0.5-cp39-cp39-win32.whl", hash = "sha256:8203a8b2d44dcb366becbc8c3d553670320e4acf0616c39e218c9561dd738d92"},
|
||||||
{file = "shapely-2.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:7fed9dbfbcfec2682d9a047b9699db8dcc890dfca857ecba872c42185fc9e64e"},
|
{file = "shapely-2.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:7fed9dbfbcfec2682d9a047b9699db8dcc890dfca857ecba872c42185fc9e64e"},
|
||||||
@ -4544,7 +4557,7 @@ files = [
|
|||||||
name = "tifffile"
|
name = "tifffile"
|
||||||
version = "2024.7.24"
|
version = "2024.7.24"
|
||||||
description = "Read and write TIFF files"
|
description = "Read and write TIFF files"
|
||||||
optional = true
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "tifffile-2024.7.24-py3-none-any.whl", hash = "sha256:f5cce1a915c37bc44ae4a792e3b42c07a30a3fa88406f5c6060a3de076487ed1"},
|
{file = "tifffile-2024.7.24-py3-none-any.whl", hash = "sha256:f5cce1a915c37bc44ae4a792e3b42c07a30a3fa88406f5c6060a3de076487ed1"},
|
||||||
@ -5105,10 +5118,9 @@ doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linke
|
|||||||
test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
|
test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
easyocr = ["easyocr"]
|
|
||||||
ocr = ["easyocr"]
|
ocr = ["easyocr"]
|
||||||
|
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "1b8f8f79c26b79a1421f9c587eb8972a4434cc2ea8d7112b97ebb56ab7cda845"
|
content-hash = "b2eabf8ecd0ce4a702875d0f785eac86d2cfe3c7d36c09b25d503ee31ea19bd4"
|
||||||
|
@ -31,9 +31,10 @@ pypdfium2 = "^4.30.0"
|
|||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
huggingface_hub = ">=0.23,<1"
|
huggingface_hub = ">=0.23,<1"
|
||||||
requests = "^2.32.3"
|
requests = "^2.32.3"
|
||||||
easyocr = { version = "^1.7", optional = true }
|
easyocr = { version = "^1.7"}
|
||||||
docling-parse = "^0.2.0"
|
docling-parse = "^0.2.0"
|
||||||
certifi = ">=2024.7.4"
|
certifi = ">=2024.7.4"
|
||||||
|
rtree = "^1.3.0"
|
||||||
|
|
||||||
[tool.poetry.group.dev.dependencies]
|
[tool.poetry.group.dev.dependencies]
|
||||||
black = {extras = ["jupyter"], version = "^24.4.2"}
|
black = {extras = ["jupyter"], version = "^24.4.2"}
|
||||||
@ -50,7 +51,6 @@ flake8-pyproject = "^1.2.3"
|
|||||||
pylint = "^2.17.5"
|
pylint = "^2.17.5"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
easyocr = ["easyocr"]
|
|
||||||
ocr = ["easyocr"]
|
ocr = ["easyocr"]
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
33
test/test_backend_docling_parse.py
Normal file
33
test/test_backend_docling_parse.py
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend, DoclingParsePageBackend
|
||||||
|
from docling.datamodel.base_models import BoundingBox
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_doc_path():
|
||||||
|
return Path("./data/2206.01062.pdf")
|
||||||
|
|
||||||
|
def test_get_text_from_rect(test_doc_path):
|
||||||
|
doc_backend = DoclingParseDocumentBackend(test_doc_path)
|
||||||
|
page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
|
||||||
|
|
||||||
|
# Get the title text of the DocLayNet paper
|
||||||
|
textpiece = page_backend.get_text_in_rect(bbox=BoundingBox(l=102,t=77,r=511,b=124))
|
||||||
|
ref = "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis"
|
||||||
|
|
||||||
|
assert textpiece.strip() == ref
|
||||||
|
|
||||||
|
def test_crop_page_image(test_doc_path):
|
||||||
|
doc_backend = DoclingParseDocumentBackend(test_doc_path)
|
||||||
|
page_backend: DoclingParsePageBackend = doc_backend.load_page(0)
|
||||||
|
|
||||||
|
# Crop out "Figure 1" from the DocLayNet paper
|
||||||
|
im = page_backend.get_page_image(scale=2, cropbox=BoundingBox(l=317,t=246,r=574,b=527))
|
||||||
|
# im.show()
|
||||||
|
|
||||||
|
def test_num_pages(test_doc_path):
|
||||||
|
doc_backend = DoclingParseDocumentBackend(test_doc_path)
|
||||||
|
doc_backend.page_count() == 9
|
Loading…
Reference in New Issue
Block a user