feat: Add pipeline timings and toggle visualization, establish debug settings (#183)

* Add settings to turn visualization on or off

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add profiling code to all models

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Refactor and fix profiling codes

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Visualization codes output PNG to debug dir

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Fixes for time logging

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Optimize imports

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Update lockfile

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

* Add start_timestamps to ProfilingItem

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-10-30 15:04:19 +01:00 committed by GitHub
parent 94a5290789
commit 2a2c65bf4f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 998 additions and 771 deletions

View File

@ -1,24 +1,20 @@
import logging import logging
import os
import re import re
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Set, Union from typing import Set, Union
from docling_core.types.doc import ( from docling_core.types.doc import (
DocItem,
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,
DocumentOrigin, DocumentOrigin,
GroupItem, GroupItem,
GroupLabel, GroupLabel,
ImageRef, ImageRef,
NodeItem,
Size, Size,
TableCell, TableCell,
TableData, TableData,
) )
from pydantic import AnyUrl
from docling.backend.abstract_backend import DeclarativeDocumentBackend from docling.backend.abstract_backend import DeclarativeDocumentBackend
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat

View File

@ -1,6 +1,6 @@
from enum import Enum, auto from enum import Enum, auto
from io import BytesIO from io import BytesIO
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union from typing import TYPE_CHECKING, Dict, List, Optional, Union
from docling_core.types.doc import ( from docling_core.types.doc import (
BoundingBox, BoundingBox,

View File

@ -3,7 +3,7 @@ import re
from enum import Enum from enum import Enum
from io import BytesIO from io import BytesIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type, Union from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Type, Union
import filetype import filetype
from docling_core.types.doc import ( from docling_core.types.doc import (
@ -52,6 +52,7 @@ from docling.datamodel.base_models import (
Page, Page,
) )
from docling.datamodel.settings import DocumentLimits from docling.datamodel.settings import DocumentLimits
from docling.utils.profiling import ProfilingItem
from docling.utils.utils import create_file_hash, create_hash from docling.utils.utils import create_file_hash, create_hash
if TYPE_CHECKING: if TYPE_CHECKING:
@ -187,6 +188,7 @@ class ConversionResult(BaseModel):
pages: List[Page] = [] pages: List[Page] = []
assembled: AssembledUnit = AssembledUnit() assembled: AssembledUnit = AssembledUnit()
timings: Dict[str, ProfilingItem] = {}
document: DoclingDocument = _EMPTY_DOCLING_DOC document: DoclingDocument = _EMPTY_DOCLING_DOC

View File

@ -1,4 +1,5 @@
import sys import sys
from pathlib import Path
from pydantic import BaseModel from pydantic import BaseModel
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
@ -26,8 +27,21 @@ class BatchConcurrencySettings(BaseModel):
# To force models into single core: export OMP_NUM_THREADS=1 # To force models into single core: export OMP_NUM_THREADS=1
class DebugSettings(BaseModel):
visualize_cells: bool = False
visualize_ocr: bool = False
visualize_layout: bool = False
visualize_tables: bool = False
profile_pipeline_timings: bool = False
# Path used to output debug information.
debug_output_path: str = str(Path.cwd() / "debug")
class AppSettings(BaseSettings): class AppSettings(BaseSettings):
perf: BatchConcurrencySettings perf: BatchConcurrencySettings
debug: DebugSettings
settings = AppSettings(perf=BatchConcurrencySettings()) settings = AppSettings(perf=BatchConcurrencySettings(), debug=DebugSettings())

View File

@ -189,24 +189,35 @@ class DocumentConverter:
) -> Iterator[ConversionResult]: ) -> Iterator[ConversionResult]:
assert self.format_to_options is not None assert self.format_to_options is not None
start_time = time.monotonic()
for input_batch in chunkify( for input_batch in chunkify(
conv_input.docs(self.format_to_options), conv_input.docs(self.format_to_options),
settings.perf.doc_batch_size, # pass format_options settings.perf.doc_batch_size, # pass format_options
): ):
_log.info(f"Going to convert document batch...") _log.info(f"Going to convert document batch...")
# parallel processing only within input_batch # parallel processing only within input_batch
# with ThreadPoolExecutor( # with ThreadPoolExecutor(
# max_workers=settings.perf.doc_batch_concurrency # max_workers=settings.perf.doc_batch_concurrency
# ) as pool: # ) as pool:
# yield from pool.map(self.process_document, input_batch) # yield from pool.map(self.process_document, input_batch)
# Note: PDF backends are not thread-safe, thread pool usage was disabled. # Note: PDF backends are not thread-safe, thread pool usage was disabled.
for item in map( for item in map(
partial(self._process_document, raises_on_error=raises_on_error), partial(self._process_document, raises_on_error=raises_on_error),
input_batch, input_batch,
): ):
elapsed = time.monotonic() - start_time
start_time = time.monotonic()
if item is not None: if item is not None:
_log.info(
f"Finished converting document {item.input.file.name} in {elapsed:.2f} sec."
)
yield item yield item
else:
_log.info(f"Skipped a document. We lost {elapsed:.2f} sec.")
def _get_pipeline(self, doc: InputDocument) -> Optional[BasePipeline]: def _get_pipeline(self, doc: InputDocument) -> Optional[BasePipeline]:
assert self.format_to_options is not None assert self.format_to_options is not None
@ -237,15 +248,8 @@ class DocumentConverter:
assert self.allowed_formats is not None assert self.allowed_formats is not None
assert in_doc.format in self.allowed_formats assert in_doc.format in self.allowed_formats
start_doc_time = time.time()
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error) conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
end_doc_time = time.time() - start_doc_time
_log.info(
f"Finished converting document {in_doc.file.name} in {end_doc_time:.2f} seconds."
)
return conv_res return conv_res
def _execute_pipeline( def _execute_pipeline(

View File

@ -4,11 +4,14 @@ from typing import Any, Iterable
from docling_core.types.doc import DoclingDocument, NodeItem from docling_core.types.doc import DoclingDocument, NodeItem
from docling.datamodel.base_models import Page from docling.datamodel.base_models import Page
from docling.datamodel.document import ConversionResult
class BasePageModel(ABC): class BasePageModel(ABC):
@abstractmethod @abstractmethod
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
pass pass

View File

@ -1,6 +1,7 @@
import copy import copy
import logging import logging
from abc import abstractmethod from abc import abstractmethod
from pathlib import Path
from typing import Iterable, List from typing import Iterable, List
import numpy as np import numpy as np
@ -10,12 +11,15 @@ from rtree import index
from scipy.ndimage import find_objects, label from scipy.ndimage import find_objects, label
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import OcrOptions from docling.datamodel.pipeline_options import OcrOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
class BaseOcrModel: class BaseOcrModel(BasePageModel):
def __init__(self, enabled: bool, options: OcrOptions): def __init__(self, enabled: bool, options: OcrOptions):
self.enabled = enabled self.enabled = enabled
self.options = options self.options = options
@ -113,7 +117,7 @@ class BaseOcrModel:
] ]
return filtered_ocr_cells return filtered_ocr_cells
def draw_ocr_rects_and_cells(self, page, ocr_rects): def draw_ocr_rects_and_cells(self, conv_res, page, ocr_rects, show: bool = False):
image = copy.deepcopy(page.image) image = copy.deepcopy(page.image)
draw = ImageDraw.Draw(image, "RGBA") draw = ImageDraw.Draw(image, "RGBA")
@ -130,8 +134,21 @@ class BaseOcrModel:
if isinstance(tc, OcrCell): if isinstance(tc, OcrCell):
color = "magenta" color = "magenta"
draw.rectangle([(x0, y0), (x1, y1)], outline=color) draw.rectangle([(x0, y0), (x1, y1)], outline=color)
image.show()
if show:
image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
out_file = out_path / f"ocr_page_{page.page_no:05}.png"
image.save(str(out_file), format="png")
@abstractmethod @abstractmethod
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
pass pass

View File

@ -1,5 +1,6 @@
import copy import copy
import random import random
from pathlib import Path
from typing import List, Union from typing import List, Union
from deepsearch_glm.nlp_utils import init_nlp_model from deepsearch_glm.nlp_utils import init_nlp_model
@ -27,6 +28,8 @@ from pydantic import BaseModel, ConfigDict
from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
from docling.datamodel.document import ConversionResult, layout_label_to_ds_type from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
from docling.datamodel.settings import settings
from docling.utils.profiling import ProfilingScope, TimeRecorder
from docling.utils.utils import create_hash from docling.utils.utils import create_hash
@ -226,23 +229,24 @@ class GlmModel:
return ds_doc return ds_doc
def __call__(self, conv_res: ConversionResult) -> DoclingDocument: def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
ds_doc = self._to_legacy_document(conv_res) with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
ds_doc_dict = ds_doc.model_dump(by_alias=True) ds_doc = self._to_legacy_document(conv_res)
ds_doc_dict = ds_doc.model_dump(by_alias=True)
glm_doc = self.model.apply_on_doc(ds_doc_dict) glm_doc = self.model.apply_on_doc(ds_doc_dict)
docling_doc: DoclingDocument = to_docling_document(glm_doc) # Experimental docling_doc: DoclingDocument = to_docling_document(glm_doc) # Experimental
# DEBUG code: # DEBUG code:
def draw_clusters_and_cells(ds_document, page_no): def draw_clusters_and_cells(ds_document, page_no, show: bool = False):
clusters_to_draw = [] clusters_to_draw = []
image = copy.deepcopy(conv_res.pages[page_no].image) image = copy.deepcopy(conv_res.pages[page_no].image)
for ix, elem in enumerate(ds_document.main_text): for ix, elem in enumerate(ds_document.main_text):
if isinstance(elem, BaseText): if isinstance(elem, BaseText):
prov = elem.prov[0] prov = elem.prov[0] # type: ignore
elif isinstance(elem, Ref): elif isinstance(elem, Ref):
_, arr, index = elem.ref.split("/") _, arr, index = elem.ref.split("/")
index = int(index) index = int(index) # type: ignore
if arr == "tables": if arr == "tables":
prov = ds_document.tables[index].prov[0] prov = ds_document.tables[index].prov[0]
elif arr == "figures": elif arr == "figures":
@ -256,7 +260,7 @@ class GlmModel:
id=ix, id=ix,
label=elem.name, label=elem.name,
bbox=BoundingBox.from_tuple( bbox=BoundingBox.from_tuple(
coord=prov.bbox, coord=prov.bbox, # type: ignore
origin=CoordOrigin.BOTTOMLEFT, origin=CoordOrigin.BOTTOMLEFT,
).to_top_left_origin(conv_res.pages[page_no].size.height), ).to_top_left_origin(conv_res.pages[page_no].size.height),
) )
@ -276,9 +280,21 @@ class GlmModel:
for tc in c.cells: # [:1]: for tc in c.cells: # [:1]:
x0, y0, x1, y1 = tc.bbox.as_tuple() x0, y0, x1, y1 = tc.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
image.show()
# draw_clusters_and_cells(ds_doc, 0) if show:
# draw_clusters_and_cells(exported_doc, 0) image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
out_file = out_path / f"doc_page_{page_no:05}.png"
image.save(str(out_file), format="png")
# for item in ds_doc.page_dimensions:
# page_no = item.page
# draw_clusters_and_cells(ds_doc, page_no)
return docling_doc return docling_doc

View File

@ -5,8 +5,11 @@ import numpy
from docling_core.types.doc import BoundingBox, CoordOrigin from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import EasyOcrOptions from docling.datamodel.pipeline_options import EasyOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -33,58 +36,65 @@ class EasyOcrModel(BaseOcrModel):
download_enabled=self.options.download_enabled, download_enabled=self.options.download_enabled,
) )
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
if not self.enabled: if not self.enabled:
yield from page_batch yield from page_batch
return return
for page in page_batch: for page in page_batch:
assert page._backend is not None assert page._backend is not None
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
ocr_rects = self.get_ocr_rects(page) with TimeRecorder(conv_res, "ocr"):
ocr_rects = self.get_ocr_rects(page)
all_ocr_cells = [] all_ocr_cells = []
for ocr_rect in ocr_rects: for ocr_rect in ocr_rects:
# Skip zero area boxes # Skip zero area boxes
if ocr_rect.area() == 0: if ocr_rect.area() == 0:
continue continue
high_res_image = page._backend.get_page_image( high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect scale=self.scale, cropbox=ocr_rect
)
im = numpy.array(high_res_image)
result = self.reader.readtext(im)
del high_res_image
del im
cells = [
OcrCell(
id=ix,
text=line[1],
confidence=line[2],
bbox=BoundingBox.from_tuple(
coord=(
(line[0][0][0] / self.scale) + ocr_rect.l,
(line[0][0][1] / self.scale) + ocr_rect.t,
(line[0][2][0] / self.scale) + ocr_rect.l,
(line[0][2][1] / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
),
) )
for ix, line in enumerate(result) im = numpy.array(high_res_image)
] result = self.reader.readtext(im)
all_ocr_cells.extend(cells)
## Remove OCR cells which overlap with programmatic cells. del high_res_image
filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells) del im
page.cells.extend(filtered_ocr_cells) cells = [
OcrCell(
id=ix,
text=line[1],
confidence=line[2],
bbox=BoundingBox.from_tuple(
coord=(
(line[0][0][0] / self.scale) + ocr_rect.l,
(line[0][0][1] / self.scale) + ocr_rect.t,
(line[0][2][0] / self.scale) + ocr_rect.l,
(line[0][2][1] / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
),
)
for ix, line in enumerate(result)
]
all_ocr_cells.extend(cells)
## Remove OCR cells which overlap with programmatic cells.
filtered_ocr_cells = self.filter_ocr_cells(
all_ocr_cells, page.cells
)
page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
yield page yield page

View File

@ -16,8 +16,11 @@ from docling.datamodel.base_models import (
LayoutPrediction, LayoutPrediction,
Page, Page,
) )
from docling.datamodel.document import ConversionResult
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.utils import layout_utils as lu from docling.utils import layout_utils as lu
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -271,74 +274,97 @@ class LayoutModel(BasePageModel):
return clusters_out_new, cells_out_new return clusters_out_new, cells_out_new
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
for page in page_batch: for page in page_batch:
assert page._backend is not None assert page._backend is not None
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
assert page.size is not None with TimeRecorder(conv_res, "layout"):
assert page.size is not None
clusters = [] clusters = []
for ix, pred_item in enumerate( for ix, pred_item in enumerate(
self.layout_predictor.predict(page.get_image(scale=1.0)) self.layout_predictor.predict(page.get_image(scale=1.0))
): ):
label = DocItemLabel( label = DocItemLabel(
pred_item["label"].lower().replace(" ", "_").replace("-", "_") pred_item["label"]
) # Temporary, until docling-ibm-model uses docling-core types .lower()
cluster = Cluster( .replace(" ", "_")
id=ix, .replace("-", "_")
label=label, ) # Temporary, until docling-ibm-model uses docling-core types
confidence=pred_item["confidence"], cluster = Cluster(
bbox=BoundingBox.model_validate(pred_item), id=ix,
cells=[], label=label,
) confidence=pred_item["confidence"],
clusters.append(cluster) bbox=BoundingBox.model_validate(pred_item),
cells=[],
# Map cells to clusters
# TODO: Remove, postprocess should take care of it anyway.
for cell in page.cells:
for cluster in clusters:
if not cell.bbox.area() > 0:
overlap_frac = 0.0
else:
overlap_frac = (
cell.bbox.intersection_area_with(cluster.bbox)
/ cell.bbox.area()
)
if overlap_frac > 0.5:
cluster.cells.append(cell)
# Pre-sort clusters
# clusters = self.sort_clusters_by_cell_order(clusters)
# DEBUG code:
def draw_clusters_and_cells():
image = copy.deepcopy(page.image)
draw = ImageDraw.Draw(image)
for c in clusters:
x0, y0, x1, y1 = c.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
cell_color = (
random.randint(30, 140),
random.randint(30, 140),
random.randint(30, 140),
) )
for tc in c.cells: # [:1]: clusters.append(cluster)
x0, y0, x1, y1 = tc.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color)
image.show()
# draw_clusters_and_cells() # Map cells to clusters
# TODO: Remove, postprocess should take care of it anyway.
for cell in page.cells:
for cluster in clusters:
if not cell.bbox.area() > 0:
overlap_frac = 0.0
else:
overlap_frac = (
cell.bbox.intersection_area_with(cluster.bbox)
/ cell.bbox.area()
)
clusters, page.cells = self.postprocess( if overlap_frac > 0.5:
clusters, page.cells, page.size.height cluster.cells.append(cell)
)
# draw_clusters_and_cells() # Pre-sort clusters
# clusters = self.sort_clusters_by_cell_order(clusters)
page.predictions.layout = LayoutPrediction(clusters=clusters) # DEBUG code:
def draw_clusters_and_cells(show: bool = False):
image = copy.deepcopy(page.image)
if image is not None:
draw = ImageDraw.Draw(image)
for c in clusters:
x0, y0, x1, y1 = c.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline="green")
cell_color = (
random.randint(30, 140),
random.randint(30, 140),
random.randint(30, 140),
)
for tc in c.cells: # [:1]:
x0, y0, x1, y1 = tc.bbox.as_tuple()
draw.rectangle(
[(x0, y0), (x1, y1)], outline=cell_color
)
if show:
image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
out_file = (
out_path / f"layout_page_{page.page_no:05}.png"
)
image.save(str(out_file), format="png")
# draw_clusters_and_cells()
clusters, page.cells = self.postprocess(
clusters, page.cells, page.size.height
)
page.predictions.layout = LayoutPrediction(clusters=clusters)
if settings.debug.visualize_layout:
draw_clusters_and_cells()
yield page yield page

View File

@ -12,8 +12,10 @@ from docling.datamodel.base_models import (
Table, Table,
TextElement, TextElement,
) )
from docling.datamodel.document import ConversionResult
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.models.layout_model import LayoutModel from docling.models.layout_model import LayoutModel
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -51,122 +53,122 @@ class PageAssembleModel(BasePageModel):
return sanitized_text.strip() # Strip any leading or trailing whitespace return sanitized_text.strip() # Strip any leading or trailing whitespace
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
for page in page_batch: for page in page_batch:
assert page._backend is not None assert page._backend is not None
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
assert page.predictions.layout is not None with TimeRecorder(conv_res, "page_assemble"):
# assembles some JSON output page by page. assert page.predictions.layout is not None
elements: List[PageElement] = [] # assembles some JSON output page by page.
headers: List[PageElement] = []
body: List[PageElement] = []
for cluster in page.predictions.layout.clusters: elements: List[PageElement] = []
# _log.info("Cluster label seen:", cluster.label) headers: List[PageElement] = []
if cluster.label in LayoutModel.TEXT_ELEM_LABELS: body: List[PageElement] = []
textlines = [ for cluster in page.predictions.layout.clusters:
cell.text.replace("\x02", "-").strip() # _log.info("Cluster label seen:", cluster.label)
for cell in cluster.cells if cluster.label in LayoutModel.TEXT_ELEM_LABELS:
if len(cell.text.strip()) > 0
]
text = self.sanitize_text(textlines)
text_el = TextElement(
label=cluster.label,
id=cluster.id,
text=text,
page_no=page.page_no,
cluster=cluster,
)
elements.append(text_el)
if cluster.label in LayoutModel.PAGE_HEADER_LABELS: textlines = [
headers.append(text_el) cell.text.replace("\x02", "-").strip()
else: for cell in cluster.cells
body.append(text_el) if len(cell.text.strip()) > 0
elif cluster.label == LayoutModel.TABLE_LABEL: ]
tbl = None text = self.sanitize_text(textlines)
if page.predictions.tablestructure: text_el = TextElement(
tbl = page.predictions.tablestructure.table_map.get(
cluster.id, None
)
if (
not tbl
): # fallback: add table without structure, if it isn't present
tbl = Table(
label=cluster.label, label=cluster.label,
id=cluster.id, id=cluster.id,
text="",
otsl_seq=[],
table_cells=[],
cluster=cluster,
page_no=page.page_no,
)
elements.append(tbl)
body.append(tbl)
elif cluster.label == LayoutModel.FIGURE_LABEL:
fig = None
if page.predictions.figures_classification:
fig = (
page.predictions.figures_classification.figure_map.get(
cluster.id, None
)
)
if (
not fig
): # fallback: add figure without classification, if it isn't present
fig = FigureElement(
label=cluster.label,
id=cluster.id,
text="",
data=None,
cluster=cluster,
page_no=page.page_no,
)
elements.append(fig)
body.append(fig)
elif cluster.label == LayoutModel.FORMULA_LABEL:
equation = None
if page.predictions.equations_prediction:
equation = (
page.predictions.equations_prediction.equation_map.get(
cluster.id, None
)
)
if (
not equation
): # fallback: add empty formula, if it isn't present
text = self.sanitize_text(
[
cell.text.replace("\x02", "-").strip()
for cell in cluster.cells
if len(cell.text.strip()) > 0
]
)
equation = TextElement(
label=cluster.label,
id=cluster.id,
cluster=cluster,
page_no=page.page_no,
text=text, text=text,
page_no=page.page_no,
cluster=cluster,
) )
elements.append(equation) elements.append(text_el)
body.append(equation)
page.assembled = AssembledUnit( if cluster.label in LayoutModel.PAGE_HEADER_LABELS:
elements=elements, headers=headers, body=body headers.append(text_el)
) else:
body.append(text_el)
elif cluster.label == LayoutModel.TABLE_LABEL:
tbl = None
if page.predictions.tablestructure:
tbl = page.predictions.tablestructure.table_map.get(
cluster.id, None
)
if (
not tbl
): # fallback: add table without structure, if it isn't present
tbl = Table(
label=cluster.label,
id=cluster.id,
text="",
otsl_seq=[],
table_cells=[],
cluster=cluster,
page_no=page.page_no,
)
# Remove page images (can be disabled) elements.append(tbl)
if not self.options.keep_images: body.append(tbl)
page._image_cache = {} elif cluster.label == LayoutModel.FIGURE_LABEL:
fig = None
if page.predictions.figures_classification:
fig = page.predictions.figures_classification.figure_map.get(
cluster.id, None
)
if (
not fig
): # fallback: add figure without classification, if it isn't present
fig = FigureElement(
label=cluster.label,
id=cluster.id,
text="",
data=None,
cluster=cluster,
page_no=page.page_no,
)
elements.append(fig)
body.append(fig)
elif cluster.label == LayoutModel.FORMULA_LABEL:
equation = None
if page.predictions.equations_prediction:
equation = page.predictions.equations_prediction.equation_map.get(
cluster.id, None
)
if (
not equation
): # fallback: add empty formula, if it isn't present
text = self.sanitize_text(
[
cell.text.replace("\x02", "-").strip()
for cell in cluster.cells
if len(cell.text.strip()) > 0
]
)
equation = TextElement(
label=cluster.label,
id=cluster.id,
cluster=cluster,
page_no=page.page_no,
text=text,
)
elements.append(equation)
body.append(equation)
# Unload backend page.assembled = AssembledUnit(
page._backend.unload() elements=elements, headers=headers, body=body
)
# Remove page images (can be disabled)
if not self.options.keep_images:
page._image_cache = {}
# Unload backend
page._backend.unload()
yield page yield page

View File

@ -1,10 +1,14 @@
from pathlib import Path
from typing import Iterable, Optional from typing import Iterable, Optional
from PIL import ImageDraw from PIL import ImageDraw
from pydantic import BaseModel from pydantic import BaseModel
from docling.datamodel.base_models import Page from docling.datamodel.base_models import Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.utils.profiling import TimeRecorder
class PagePreprocessingOptions(BaseModel): class PagePreprocessingOptions(BaseModel):
@ -15,14 +19,17 @@ class PagePreprocessingModel(BasePageModel):
def __init__(self, options: PagePreprocessingOptions): def __init__(self, options: PagePreprocessingOptions):
self.options = options self.options = options
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
for page in page_batch: for page in page_batch:
assert page._backend is not None assert page._backend is not None
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
page = self._populate_page_images(page) with TimeRecorder(conv_res, "page_parse"):
page = self._parse_page_cells(page) page = self._populate_page_images(page)
page = self._parse_page_cells(conv_res, page)
yield page yield page
# Generate the page image and store it in the page object # Generate the page image and store it in the page object
@ -43,19 +50,30 @@ class PagePreprocessingModel(BasePageModel):
return page return page
# Extract and populate the page cells and store it in the page object # Extract and populate the page cells and store it in the page object
def _parse_page_cells(self, page: Page) -> Page: def _parse_page_cells(self, conv_res: ConversionResult, page: Page) -> Page:
assert page._backend is not None assert page._backend is not None
page.cells = list(page._backend.get_text_cells()) page.cells = list(page._backend.get_text_cells())
# DEBUG code: # DEBUG code:
def draw_text_boxes(image, cells): def draw_text_boxes(image, cells, show: bool = False):
draw = ImageDraw.Draw(image) draw = ImageDraw.Draw(image)
for c in cells: for c in cells:
x0, y0, x1, y1 = c.bbox.as_tuple() x0, y0, x1, y1 = c.bbox.as_tuple()
draw.rectangle([(x0, y0), (x1, y1)], outline="red") draw.rectangle([(x0, y0), (x1, y1)], outline="red")
image.show() if show:
image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
# draw_text_boxes(page.get_image(scale=1.0), cells) out_file = out_path / f"cells_page_{page.page_no:05}.png"
image.save(str(out_file), format="png")
if settings.debug.visualize_cells:
draw_text_boxes(page.get_image(scale=1.0), page.cells)
return page return page

View File

@ -1,6 +1,6 @@
import copy import copy
from pathlib import Path from pathlib import Path
from typing import Iterable, List from typing import Iterable
import numpy import numpy
from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell from docling_core.types.doc import BoundingBox, DocItemLabel, TableCell
@ -8,8 +8,11 @@ from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredic
from PIL import ImageDraw from PIL import ImageDraw
from docling.datamodel.base_models import Page, Table, TableStructurePrediction from docling.datamodel.base_models import Page, Table, TableStructurePrediction
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions from docling.datamodel.pipeline_options import TableFormerMode, TableStructureOptions
from docling.datamodel.settings import settings
from docling.models.base_model import BasePageModel from docling.models.base_model import BasePageModel
from docling.utils.profiling import TimeRecorder
class TableStructureModel(BasePageModel): class TableStructureModel(BasePageModel):
@ -35,7 +38,13 @@ class TableStructureModel(BasePageModel):
self.tf_predictor = TFPredictor(self.tm_config) self.tf_predictor = TFPredictor(self.tm_config)
self.scale = 2.0 # Scale up table input images to 144 dpi self.scale = 2.0 # Scale up table input images to 144 dpi
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]): def draw_table_and_cells(
self,
conv_res: ConversionResult,
page: Page,
tbl_list: Iterable[Table],
show: bool = False,
):
assert page._backend is not None assert page._backend is not None
image = ( image = (
@ -61,9 +70,21 @@ class TableStructureModel(BasePageModel):
fill="black", fill="black",
) )
image.show() if show:
image.show()
else:
out_path: Path = (
Path(settings.debug.debug_output_path)
/ f"debug_{conv_res.input.file.stem}"
)
out_path.mkdir(parents=True, exist_ok=True)
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: out_file = out_path / f"table_struct_page_{page.page_no:05}.png"
image.save(str(out_file), format="png")
def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
if not self.enabled: if not self.enabled:
yield from page_batch yield from page_batch
@ -74,98 +95,112 @@ class TableStructureModel(BasePageModel):
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
with TimeRecorder(conv_res, "table_structure"):
assert page.predictions.layout is not None assert page.predictions.layout is not None
assert page.size is not None assert page.size is not None
page.predictions.tablestructure = TableStructurePrediction() # dummy page.predictions.tablestructure = (
TableStructurePrediction()
) # dummy
in_tables = [ in_tables = [
( (
cluster, cluster,
[ [
round(cluster.bbox.l) * self.scale, round(cluster.bbox.l) * self.scale,
round(cluster.bbox.t) * self.scale, round(cluster.bbox.t) * self.scale,
round(cluster.bbox.r) * self.scale, round(cluster.bbox.r) * self.scale,
round(cluster.bbox.b) * self.scale, round(cluster.bbox.b) * self.scale,
], ],
)
for cluster in page.predictions.layout.clusters
if cluster.label == DocItemLabel.TABLE
]
if not len(in_tables):
yield page
continue
tokens = []
for c in page.cells:
for cluster, _ in in_tables:
if c.bbox.area() > 0:
if (
c.bbox.intersection_area_with(cluster.bbox)
/ c.bbox.area()
> 0.2
):
# Only allow non empty stings (spaces) into the cells of a table
if len(c.text.strip()) > 0:
new_cell = copy.deepcopy(c)
new_cell.bbox = new_cell.bbox.scaled(
scale=self.scale
)
tokens.append(new_cell.model_dump())
page_input = {
"tokens": tokens,
"width": page.size.width * self.scale,
"height": page.size.height * self.scale,
}
page_input["image"] = numpy.asarray(
page.get_image(scale=self.scale)
) )
for cluster in page.predictions.layout.clusters
if cluster.label == DocItemLabel.TABLE
]
if not len(in_tables):
yield page
continue
tokens = [] table_clusters, table_bboxes = zip(*in_tables)
for c in page.cells:
for cluster, _ in in_tables: if len(table_bboxes):
if c.bbox.area() > 0: tf_output = self.tf_predictor.multi_table_predict(
if ( page_input, table_bboxes, do_matching=self.do_cell_matching
c.bbox.intersection_area_with(cluster.bbox) )
/ c.bbox.area()
> 0.2 for table_cluster, table_out in zip(table_clusters, tf_output):
): table_cells = []
# Only allow non empty stings (spaces) into the cells of a table for element in table_out["tf_responses"]:
if len(c.text.strip()) > 0:
new_cell = copy.deepcopy(c) if not self.do_cell_matching:
new_cell.bbox = new_cell.bbox.scaled( the_bbox = BoundingBox.model_validate(
scale=self.scale element["bbox"]
).scaled(1 / self.scale)
text_piece = page._backend.get_text_in_rect(
the_bbox
) )
element["bbox"]["token"] = text_piece
tokens.append(new_cell.model_dump()) tc = TableCell.model_validate(element)
if self.do_cell_matching and tc.bbox is not None:
tc.bbox = tc.bbox.scaled(1 / self.scale)
table_cells.append(tc)
page_input = { # Retrieving cols/rows, after post processing:
"tokens": tokens, num_rows = table_out["predict_details"]["num_rows"]
"width": page.size.width * self.scale, num_cols = table_out["predict_details"]["num_cols"]
"height": page.size.height * self.scale, otsl_seq = table_out["predict_details"]["prediction"][
} "rs_seq"
page_input["image"] = numpy.asarray(page.get_image(scale=self.scale)) ]
table_clusters, table_bboxes = zip(*in_tables) tbl = Table(
otsl_seq=otsl_seq,
table_cells=table_cells,
num_rows=num_rows,
num_cols=num_cols,
id=table_cluster.id,
page_no=page.page_no,
cluster=table_cluster,
label=DocItemLabel.TABLE,
)
if len(table_bboxes): page.predictions.tablestructure.table_map[
tf_output = self.tf_predictor.multi_table_predict( table_cluster.id
page_input, table_bboxes, do_matching=self.do_cell_matching ] = tbl
)
for table_cluster, table_out in zip(table_clusters, tf_output):
table_cells = []
for element in table_out["tf_responses"]:
if not self.do_cell_matching:
the_bbox = BoundingBox.model_validate(
element["bbox"]
).scaled(1 / self.scale)
text_piece = page._backend.get_text_in_rect(the_bbox)
element["bbox"]["token"] = text_piece
tc = TableCell.model_validate(element)
if self.do_cell_matching and tc.bbox is not None:
tc.bbox = tc.bbox.scaled(1 / self.scale)
table_cells.append(tc)
# Retrieving cols/rows, after post processing:
num_rows = table_out["predict_details"]["num_rows"]
num_cols = table_out["predict_details"]["num_cols"]
otsl_seq = table_out["predict_details"]["prediction"]["rs_seq"]
tbl = Table(
otsl_seq=otsl_seq,
table_cells=table_cells,
num_rows=num_rows,
num_cols=num_cols,
id=table_cluster.id,
page_no=page.page_no,
cluster=table_cluster,
label=DocItemLabel.TABLE,
)
page.predictions.tablestructure.table_map[table_cluster.id] = (
tbl
)
# For debugging purposes: # For debugging purposes:
# self.draw_table_and_cells(page, page.predictions.tablestructure.table_map.values()) if settings.debug.visualize_tables:
self.draw_table_and_cells(
conv_res,
page,
page.predictions.tablestructure.table_map.values(),
)
yield page yield page

View File

@ -8,8 +8,11 @@ import pandas as pd
from docling_core.types.doc import BoundingBox, CoordOrigin from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import TesseractCliOcrOptions from docling.datamodel.pipeline_options import TesseractCliOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -102,7 +105,9 @@ class TesseractOcrCliModel(BaseOcrModel):
return df_filtered return df_filtered
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
if not self.enabled: if not self.enabled:
yield from page_batch yield from page_batch
@ -113,62 +118,67 @@ class TesseractOcrCliModel(BaseOcrModel):
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
ocr_rects = self.get_ocr_rects(page) with TimeRecorder(conv_res, "ocr"):
all_ocr_cells = [] ocr_rects = self.get_ocr_rects(page)
for ocr_rect in ocr_rects:
# Skip zero area boxes all_ocr_cells = []
if ocr_rect.area() == 0: for ocr_rect in ocr_rects:
continue # Skip zero area boxes
high_res_image = page._backend.get_page_image( if ocr_rect.area() == 0:
scale=self.scale, cropbox=ocr_rect continue
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)
with tempfile.NamedTemporaryFile(
suffix=".png", mode="w"
) as image_file:
fname = image_file.name
high_res_image.save(fname)
df = self._run_tesseract(fname)
# _log.info(df)
# Print relevant columns (bounding box and text)
for ix, row in df.iterrows():
text = row["text"]
conf = row["conf"]
l = float(row["left"])
b = float(row["top"])
w = float(row["width"])
h = float(row["height"])
t = b + h
r = l + w
cell = OcrCell(
id=ix,
text=text,
confidence=conf / 100.0,
bbox=BoundingBox.from_tuple(
coord=(
(l / self.scale) + ocr_rect.l,
(b / self.scale) + ocr_rect.t,
(r / self.scale) + ocr_rect.l,
(t / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
),
)
all_ocr_cells.append(cell)
## Remove OCR cells which overlap with programmatic cells.
filtered_ocr_cells = self.filter_ocr_cells(
all_ocr_cells, page.cells
) )
with tempfile.NamedTemporaryFile( page.cells.extend(filtered_ocr_cells)
suffix=".png", mode="w"
) as image_file:
fname = image_file.name
high_res_image.save(fname)
df = self._run_tesseract(fname)
# _log.info(df)
# Print relevant columns (bounding box and text)
for ix, row in df.iterrows():
text = row["text"]
conf = row["conf"]
l = float(row["left"])
b = float(row["top"])
w = float(row["width"])
h = float(row["height"])
t = b + h
r = l + w
cell = OcrCell(
id=ix,
text=text,
confidence=conf / 100.0,
bbox=BoundingBox.from_tuple(
coord=(
(l / self.scale) + ocr_rect.l,
(b / self.scale) + ocr_rect.t,
(r / self.scale) + ocr_rect.l,
(t / self.scale) + ocr_rect.t,
),
origin=CoordOrigin.TOPLEFT,
),
)
all_ocr_cells.append(cell)
## Remove OCR cells which overlap with programmatic cells.
filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells)
page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
yield page yield page

View File

@ -4,8 +4,11 @@ from typing import Iterable
from docling_core.types.doc import BoundingBox, CoordOrigin from docling_core.types.doc import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell, Page from docling.datamodel.base_models import OcrCell, Page
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import TesseractOcrOptions from docling.datamodel.pipeline_options import TesseractOcrOptions
from docling.datamodel.settings import settings
from docling.models.base_ocr_model import BaseOcrModel from docling.models.base_ocr_model import BaseOcrModel
from docling.utils.profiling import TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -61,7 +64,9 @@ class TesseractOcrModel(BaseOcrModel):
# Finalize the tesseractAPI # Finalize the tesseractAPI
self.reader.End() self.reader.End()
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: def __call__(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
if not self.enabled: if not self.enabled:
yield from page_batch yield from page_batch
@ -72,59 +77,66 @@ class TesseractOcrModel(BaseOcrModel):
if not page._backend.is_valid(): if not page._backend.is_valid():
yield page yield page
else: else:
assert self.reader is not None with TimeRecorder(conv_res, "ocr"):
ocr_rects = self.get_ocr_rects(page) assert self.reader is not None
all_ocr_cells = [] ocr_rects = self.get_ocr_rects(page)
for ocr_rect in ocr_rects:
# Skip zero area boxes
if ocr_rect.area() == 0:
continue
high_res_image = page._backend.get_page_image(
scale=self.scale, cropbox=ocr_rect
)
# Retrieve text snippets with their bounding boxes all_ocr_cells = []
self.reader.SetImage(high_res_image) for ocr_rect in ocr_rects:
boxes = self.reader.GetComponentImages( # Skip zero area boxes
self.reader_RIL.TEXTLINE, True if ocr_rect.area() == 0:
) continue
high_res_image = page._backend.get_page_image(
cells = [] scale=self.scale, cropbox=ocr_rect
for ix, (im, box, _, _) in enumerate(boxes):
# Set the area of interest. Tesseract uses Bottom-Left for the origin
self.reader.SetRectangle(box["x"], box["y"], box["w"], box["h"])
# Extract text within the bounding box
text = self.reader.GetUTF8Text().strip()
confidence = self.reader.MeanTextConf()
left = box["x"] / self.scale
bottom = box["y"] / self.scale
right = (box["x"] + box["w"]) / self.scale
top = (box["y"] + box["h"]) / self.scale
cells.append(
OcrCell(
id=ix,
text=text,
confidence=confidence,
bbox=BoundingBox.from_tuple(
coord=(left, top, right, bottom),
origin=CoordOrigin.TOPLEFT,
),
)
) )
# del high_res_image # Retrieve text snippets with their bounding boxes
all_ocr_cells.extend(cells) self.reader.SetImage(high_res_image)
boxes = self.reader.GetComponentImages(
self.reader_RIL.TEXTLINE, True
)
## Remove OCR cells which overlap with programmatic cells. cells = []
filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells) for ix, (im, box, _, _) in enumerate(boxes):
# Set the area of interest. Tesseract uses Bottom-Left for the origin
self.reader.SetRectangle(
box["x"], box["y"], box["w"], box["h"]
)
page.cells.extend(filtered_ocr_cells) # Extract text within the bounding box
text = self.reader.GetUTF8Text().strip()
confidence = self.reader.MeanTextConf()
left = box["x"] / self.scale
bottom = box["y"] / self.scale
right = (box["x"] + box["w"]) / self.scale
top = (box["y"] + box["h"]) / self.scale
cells.append(
OcrCell(
id=ix,
text=text,
confidence=confidence,
bbox=BoundingBox.from_tuple(
coord=(left, top, right, bottom),
origin=CoordOrigin.TOPLEFT,
),
)
)
# del high_res_image
all_ocr_cells.extend(cells)
## Remove OCR cells which overlap with programmatic cells.
filtered_ocr_cells = self.filter_ocr_cells(
all_ocr_cells, page.cells
)
page.cells.extend(filtered_ocr_cells)
# DEBUG code: # DEBUG code:
# self.draw_ocr_rects_and_cells(page, ocr_rects) if settings.debug.visualize_ocr:
self.draw_ocr_rects_and_cells(conv_res, page, ocr_rects)
yield page yield page

View File

@ -19,6 +19,7 @@ from docling.datamodel.document import ConversionResult, InputDocument
from docling.datamodel.pipeline_options import PipelineOptions from docling.datamodel.pipeline_options import PipelineOptions
from docling.datamodel.settings import settings from docling.datamodel.settings import settings
from docling.models.base_model import BaseEnrichmentModel from docling.models.base_model import BaseEnrichmentModel
from docling.utils.profiling import ProfilingScope, TimeRecorder
from docling.utils.utils import chunkify from docling.utils.utils import chunkify
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -35,13 +36,16 @@ class BasePipeline(ABC):
_log.info(f"Processing document {in_doc.file.name}") _log.info(f"Processing document {in_doc.file.name}")
try: try:
# These steps are building and assembling the structure of the with TimeRecorder(
# output DoclingDocument conv_res, "pipeline_total", scope=ProfilingScope.DOCUMENT
conv_res = self._build_document(in_doc, conv_res) ):
conv_res = self._assemble_document(in_doc, conv_res) # These steps are building and assembling the structure of the
# From this stage, all operations should rely only on conv_res.output # output DoclingDocument
conv_res = self._enrich_document(in_doc, conv_res) conv_res = self._build_document(conv_res)
conv_res.status = self._determine_status(in_doc, conv_res) conv_res = self._assemble_document(conv_res)
# From this stage, all operations should rely only on conv_res.output
conv_res = self._enrich_document(conv_res)
conv_res.status = self._determine_status(conv_res)
except Exception as e: except Exception as e:
conv_res.status = ConversionStatus.FAILURE conv_res.status = ConversionStatus.FAILURE
if raises_on_error: if raises_on_error:
@ -50,19 +54,13 @@ class BasePipeline(ABC):
return conv_res return conv_res
@abstractmethod @abstractmethod
def _build_document( def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
pass pass
def _assemble_document( def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
return conv_res return conv_res
def _enrich_document( def _enrich_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
def _filter_elements( def _filter_elements(
doc: DoclingDocument, model: BaseEnrichmentModel doc: DoclingDocument, model: BaseEnrichmentModel
@ -71,24 +69,23 @@ class BasePipeline(ABC):
if model.is_processable(doc=doc, element=element): if model.is_processable(doc=doc, element=element):
yield element yield element
for model in self.enrichment_pipe: with TimeRecorder(conv_res, "doc_enrich", scope=ProfilingScope.DOCUMENT):
for element_batch in chunkify( for model in self.enrichment_pipe:
_filter_elements(conv_res.document, model), for element_batch in chunkify(
settings.perf.elements_batch_size, _filter_elements(conv_res.document, model),
): settings.perf.elements_batch_size,
# TODO: currently we assume the element itself is modified, because ):
# we don't have an interface to save the element back to the document # TODO: currently we assume the element itself is modified, because
for element in model( # we don't have an interface to save the element back to the document
doc=conv_res.document, element_batch=element_batch for element in model(
): # Must exhaust! doc=conv_res.document, element_batch=element_batch
pass ): # Must exhaust!
pass
return conv_res return conv_res
@abstractmethod @abstractmethod
def _determine_status( def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionStatus:
pass pass
@classmethod @classmethod
@ -110,66 +107,68 @@ class BasePipeline(ABC):
class PaginatedPipeline(BasePipeline): # TODO this is a bad name. class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
def _apply_on_pages(self, page_batch: Iterable[Page]) -> Iterable[Page]: def _apply_on_pages(
self, conv_res: ConversionResult, page_batch: Iterable[Page]
) -> Iterable[Page]:
for model in self.build_pipe: for model in self.build_pipe:
page_batch = model(page_batch) page_batch = model(conv_res, page_batch)
yield from page_batch yield from page_batch
def _build_document( def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
if not isinstance(in_doc._backend, PdfDocumentBackend): if not isinstance(conv_res.input._backend, PdfDocumentBackend):
raise RuntimeError( raise RuntimeError(
f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a PDF backend. " f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a PDF backend. "
f"Can not convert this with a PDF pipeline. " f"Can not convert this with a PDF pipeline. "
f"Please check your format configuration on DocumentConverter." f"Please check your format configuration on DocumentConverter."
) )
# conv_res.status = ConversionStatus.FAILURE # conv_res.status = ConversionStatus.FAILURE
# return conv_res # return conv_res
for i in range(0, in_doc.page_count): with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
conv_res.pages.append(Page(page_no=i))
try: for i in range(0, conv_res.input.page_count):
# Iterate batches of pages (page_batch_size) in the doc conv_res.pages.append(Page(page_no=i))
for page_batch in chunkify(conv_res.pages, settings.perf.page_batch_size):
start_pb_time = time.time()
# 1. Initialise the page resources try:
init_pages = map( # Iterate batches of pages (page_batch_size) in the doc
functools.partial(self.initialize_page, in_doc), page_batch for page_batch in chunkify(
conv_res.pages, settings.perf.page_batch_size
):
start_pb_time = time.time()
# 1. Initialise the page resources
init_pages = map(
functools.partial(self.initialize_page, conv_res), page_batch
)
# 2. Run pipeline stages
pipeline_pages = self._apply_on_pages(conv_res, init_pages)
for p in pipeline_pages: # Must exhaust!
pass
end_pb_time = time.time() - start_pb_time
_log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
except Exception as e:
conv_res.status = ConversionStatus.FAILURE
trace = "\n".join(traceback.format_exception(e))
_log.warning(
f"Encountered an error during conversion of document {conv_res.input.document_hash}:\n"
f"{trace}"
) )
raise e
# 2. Run pipeline stages finally:
pipeline_pages = self._apply_on_pages(init_pages) # Always unload the PDF backend, even in case of failure
if conv_res.input._backend:
for p in pipeline_pages: # Must exhaust! conv_res.input._backend.unload()
pass
end_pb_time = time.time() - start_pb_time
_log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
except Exception as e:
conv_res.status = ConversionStatus.FAILURE
trace = "\n".join(traceback.format_exception(e))
_log.warning(
f"Encountered an error during conversion of document {in_doc.document_hash}:\n"
f"{trace}"
)
raise e
finally:
# Always unload the PDF backend, even in case of failure
if in_doc._backend:
in_doc._backend.unload()
return conv_res return conv_res
def _determine_status( def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionStatus:
status = ConversionStatus.SUCCESS status = ConversionStatus.SUCCESS
for page in conv_res.pages: for page in conv_res.pages:
if page._backend is None or not page._backend.is_valid(): if page._backend is None or not page._backend.is_valid():
@ -186,5 +185,5 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
# Initialise and load resources for a page # Initialise and load resources for a page
@abstractmethod @abstractmethod
def initialize_page(self, doc: InputDocument, page: Page) -> Page: def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
pass pass

View File

@ -5,9 +5,10 @@ from docling.backend.abstract_backend import (
DeclarativeDocumentBackend, DeclarativeDocumentBackend,
) )
from docling.datamodel.base_models import ConversionStatus from docling.datamodel.base_models import ConversionStatus
from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import PipelineOptions from docling.datamodel.pipeline_options import PipelineOptions
from docling.pipeline.base_pipeline import BasePipeline from docling.pipeline.base_pipeline import BasePipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -22,13 +23,11 @@ class SimplePipeline(BasePipeline):
def __init__(self, pipeline_options: PipelineOptions): def __init__(self, pipeline_options: PipelineOptions):
super().__init__(pipeline_options) super().__init__(pipeline_options)
def _build_document( def _build_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
if not isinstance(in_doc._backend, DeclarativeDocumentBackend): if not isinstance(conv_res.input._backend, DeclarativeDocumentBackend):
raise RuntimeError( raise RuntimeError(
f"The selected backend {type(in_doc._backend).__name__} for {in_doc.file} is not a declarative backend. " f"The selected backend {type(conv_res.input._backend).__name__} for {conv_res.input.file} is not a declarative backend. "
f"Can not convert this with simple pipeline. " f"Can not convert this with simple pipeline. "
f"Please check your format configuration on DocumentConverter." f"Please check your format configuration on DocumentConverter."
) )
@ -38,13 +37,11 @@ class SimplePipeline(BasePipeline):
# Instead of running a page-level pipeline to build up the document structure, # Instead of running a page-level pipeline to build up the document structure,
# the backend is expected to be of type DeclarativeDocumentBackend, which can output # the backend is expected to be of type DeclarativeDocumentBackend, which can output
# a DoclingDocument straight. # a DoclingDocument straight.
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
conv_res.document = in_doc._backend.convert() conv_res.document = conv_res.input._backend.convert()
return conv_res return conv_res
def _determine_status( def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionStatus:
# This is called only if the previous steps didn't raise. # This is called only if the previous steps didn't raise.
# Since we don't have anything else to evaluate, we can # Since we don't have anything else to evaluate, we can
# safely return SUCCESS. # safely return SUCCESS.

View File

@ -7,7 +7,7 @@ from docling_core.types.doc import DocItem, ImageRef, PictureItem, TableItem
from docling.backend.abstract_backend import AbstractDocumentBackend from docling.backend.abstract_backend import AbstractDocumentBackend
from docling.backend.pdf_backend import PdfDocumentBackend from docling.backend.pdf_backend import PdfDocumentBackend
from docling.datamodel.base_models import AssembledUnit, Page from docling.datamodel.base_models import AssembledUnit, Page
from docling.datamodel.document import ConversionResult, InputDocument from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import ( from docling.datamodel.pipeline_options import (
EasyOcrOptions, EasyOcrOptions,
PdfPipelineOptions, PdfPipelineOptions,
@ -27,6 +27,7 @@ from docling.models.table_structure_model import TableStructureModel
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
from docling.models.tesseract_ocr_model import TesseractOcrModel from docling.models.tesseract_ocr_model import TesseractOcrModel
from docling.pipeline.base_pipeline import PaginatedPipeline from docling.pipeline.base_pipeline import PaginatedPipeline
from docling.utils.profiling import ProfilingScope, TimeRecorder
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -119,73 +120,75 @@ class StandardPdfPipeline(PaginatedPipeline):
) )
return None return None
def initialize_page(self, doc: InputDocument, page: Page) -> Page: def initialize_page(self, conv_res: ConversionResult, page: Page) -> Page:
page._backend = doc._backend.load_page(page.page_no) # type: ignore with TimeRecorder(conv_res, "page_init"):
if page._backend is not None and page._backend.is_valid(): page._backend = conv_res.input._backend.load_page(page.page_no) # type: ignore
page.size = page._backend.get_size() if page._backend is not None and page._backend.is_valid():
page.size = page._backend.get_size()
return page return page
def _assemble_document( def _assemble_document(self, conv_res: ConversionResult) -> ConversionResult:
self, in_doc: InputDocument, conv_res: ConversionResult
) -> ConversionResult:
all_elements = [] all_elements = []
all_headers = [] all_headers = []
all_body = [] all_body = []
for p in conv_res.pages: with TimeRecorder(conv_res, "doc_assemble", scope=ProfilingScope.DOCUMENT):
if p.assembled is not None: for p in conv_res.pages:
for el in p.assembled.body: if p.assembled is not None:
all_body.append(el) for el in p.assembled.body:
for el in p.assembled.headers: all_body.append(el)
all_headers.append(el) for el in p.assembled.headers:
for el in p.assembled.elements: all_headers.append(el)
all_elements.append(el) for el in p.assembled.elements:
all_elements.append(el)
conv_res.assembled = AssembledUnit( conv_res.assembled = AssembledUnit(
elements=all_elements, headers=all_headers, body=all_body elements=all_elements, headers=all_headers, body=all_body
) )
conv_res.document = self.glm_model(conv_res) conv_res.document = self.glm_model(conv_res)
# Generate page images in the output # Generate page images in the output
if self.pipeline_options.generate_page_images: if self.pipeline_options.generate_page_images:
for page in conv_res.pages: for page in conv_res.pages:
assert page.image is not None
page_no = page.page_no + 1
conv_res.document.pages[page_no].image = ImageRef.from_pil(
page.image, dpi=int(72 * self.pipeline_options.images_scale)
)
# Generate images of the requested element types
if (
self.pipeline_options.generate_picture_images
or self.pipeline_options.generate_table_images
):
scale = self.pipeline_options.images_scale
for element, _level in conv_res.document.iterate_items():
if not isinstance(element, DocItem) or len(element.prov) == 0:
continue
if (
isinstance(element, PictureItem)
and self.pipeline_options.generate_picture_images
) or (
isinstance(element, TableItem)
and self.pipeline_options.generate_table_images
):
page_ix = element.prov[0].page_no - 1
page = conv_res.pages[page_ix]
assert page.size is not None
assert page.image is not None assert page.image is not None
page_no = page.page_no + 1
crop_bbox = ( conv_res.document.pages[page_no].image = ImageRef.from_pil(
element.prov[0] page.image, dpi=int(72 * self.pipeline_options.images_scale)
.bbox.scaled(scale=scale)
.to_top_left_origin(page_height=page.size.height * scale)
) )
cropped_im = page.image.crop(crop_bbox.as_tuple()) # Generate images of the requested element types
element.image = ImageRef.from_pil(cropped_im, dpi=int(72 * scale)) if (
self.pipeline_options.generate_picture_images
or self.pipeline_options.generate_table_images
):
scale = self.pipeline_options.images_scale
for element, _level in conv_res.document.iterate_items():
if not isinstance(element, DocItem) or len(element.prov) == 0:
continue
if (
isinstance(element, PictureItem)
and self.pipeline_options.generate_picture_images
) or (
isinstance(element, TableItem)
and self.pipeline_options.generate_table_images
):
page_ix = element.prov[0].page_no - 1
page = conv_res.pages[page_ix]
assert page.size is not None
assert page.image is not None
crop_bbox = (
element.prov[0]
.bbox.scaled(scale=scale)
.to_top_left_origin(page_height=page.size.height * scale)
)
cropped_im = page.image.crop(crop_bbox.as_tuple())
element.image = ImageRef.from_pil(
cropped_im, dpi=int(72 * scale)
)
return conv_res return conv_res

View File

@ -0,0 +1,62 @@
import time
from datetime import datetime
from enum import Enum
from typing import TYPE_CHECKING, List
import numpy as np
from pydantic import BaseModel
from docling.datamodel.settings import settings
if TYPE_CHECKING:
from docling.datamodel.document import ConversionResult
class ProfilingScope(str, Enum):
PAGE = "page"
DOCUMENT = "document"
class ProfilingItem(BaseModel):
scope: ProfilingScope
count: int = 0
times: List[float] = []
start_timestamps: List[datetime] = []
def avg(self) -> float:
return np.average(self.times) # type: ignore
def std(self) -> float:
return np.std(self.times) # type: ignore
def mean(self) -> float:
return np.mean(self.times) # type: ignore
def percentile(self, perc: float) -> float:
return np.percentile(self.times, perc) # type: ignore
class TimeRecorder:
def __init__(
self,
conv_res: "ConversionResult",
key: str,
scope: ProfilingScope = ProfilingScope.PAGE,
):
if settings.debug.profile_pipeline_timings:
if key not in conv_res.timings.keys():
conv_res.timings[key] = ProfilingItem(scope=scope)
self.conv_res = conv_res
self.key = key
def __enter__(self):
if settings.debug.profile_pipeline_timings:
self.start = time.monotonic()
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
return self
def __exit__(self, *args):
if settings.debug.profile_pipeline_timings:
elapsed = time.monotonic() - self.start
self.conv_res.timings[self.key].times.append(elapsed)
self.conv_res.timings[self.key].count += 1

View File

@ -8,6 +8,7 @@ import yaml
from docling.datamodel.base_models import ConversionStatus from docling.datamodel.base_models import ConversionStatus
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult
from docling.datamodel.settings import settings
from docling.document_converter import DocumentConverter from docling.document_converter import DocumentConverter
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -113,6 +114,12 @@ def main():
# docs = [DocumentStream(name="my_doc.pdf", stream=buf)] # docs = [DocumentStream(name="my_doc.pdf", stream=buf)]
# input = DocumentConversionInput.from_streams(docs) # input = DocumentConversionInput.from_streams(docs)
# # Turn on inline debug visualizations:
# settings.debug.visualize_layout = True
# settings.debug.visualize_ocr = True
# settings.debug.visualize_tables = True
# settings.debug.visualize_cells = True
doc_converter = DocumentConverter() doc_converter = DocumentConverter()
start_time = time.time() start_time = time.time()

420
poetry.lock generated
View File

@ -913,13 +913,13 @@ tabulate = ">=0.9.0,<0.10.0"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
version = "2.0.1" version = "2.0.2"
description = "This package contains the AI models used by the Docling PDF conversion package" description = "This package contains the AI models used by the Docling PDF conversion package"
optional = false optional = false
python-versions = "<4.0,>=3.10" python-versions = "<4.0,>=3.10"
files = [ files = [
{file = "docling_ibm_models-2.0.1-py3-none-any.whl", hash = "sha256:f81c6002b7e102aa79afb8287fce48872f27d1cffb088ea4d1fbebe490364a1d"}, {file = "docling_ibm_models-2.0.2-py3-none-any.whl", hash = "sha256:dd27889838319d55a45704f80eb1e75ccfe98de907e5d53f7815ef50402dffe7"},
{file = "docling_ibm_models-2.0.1.tar.gz", hash = "sha256:4fb0300022cfa0d0ac1fcbcb296c144e71ee9816654407f8a4d3a7b934f3065f"}, {file = "docling_ibm_models-2.0.2.tar.gz", hash = "sha256:5c8b7030faa171558fa83fabd3d1bade729e0319265ad776ed78b89aefbb1982"},
] ]
[package.dependencies] [package.dependencies]
@ -945,41 +945,41 @@ tqdm = ">=4.64.0,<5.0.0"
[[package]] [[package]]
name = "docling-parse" name = "docling-parse"
version = "2.0.0" version = "2.0.1"
description = "Simple package to extract text with coordinates from programmatic PDFs" description = "Simple package to extract text with coordinates from programmatic PDFs"
optional = false optional = false
python-versions = "<4.0,>=3.9" python-versions = "<4.0,>=3.9"
files = [ files = [
{file = "docling_parse-2.0.0-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:269a0ba847e2faf7aa72a31861141f09ffa9a347e0b16810f45eba8f9104d2ca"}, {file = "docling_parse-2.0.1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:dab77f27ad8327a9350ca69792d18e0b4297877cc64917b46c2bb7b6e8c6ea31"},
{file = "docling_parse-2.0.0-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:1bf48f8042345ec94d2de3a375711fcd7ed55356e3f60f220fc5df09fe9031b0"}, {file = "docling_parse-2.0.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:ae7caa659cd6eae718668f690086949046e366042b23d6a736e5f1dc2a5d9afc"},
{file = "docling_parse-2.0.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:5e4da33ce3857c7c83628b9dbd136932b81ad7718e7edf2fe8e44c3469ee0b9a"}, {file = "docling_parse-2.0.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ffb500b5113129dfd606399b1d09a61b2fbec62596b77c20044a05ea599f05aa"},
{file = "docling_parse-2.0.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:011b4a847c71e1a1ac362dfe8c7951997fc4eef47eb29b43e7f1c13fcac983d8"}, {file = "docling_parse-2.0.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:0e3f30eba548a37fb821db29a8c7e9dfb99b6262194bb305bee2f833d65c3134"},
{file = "docling_parse-2.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e77b8f731a614c6da3beb4c59eb878c1668a6956f5de06f42a3d4f502cde46e4"}, {file = "docling_parse-2.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:039d775fe017dfed5856e08a3db15dece5cb00e164c1529f37aca57e90fb5fc6"},
{file = "docling_parse-2.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:696d9a93e8879026ae56cb677b355778e09c146cd07c0a3f6f99a3fdc5846421"}, {file = "docling_parse-2.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c6db928e1ac317db77bec87f76e47023b2fd4aac4f81babef00cf30bd526618"},
{file = "docling_parse-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:0a8b10fed8022e9343c1be9ea9e82bcc77a5e32042a585cb816db9c5f51b906f"}, {file = "docling_parse-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:1d94e5d7146bfc5f0cb767000f0b0bc4837df9075d046df46ef3802b08c12f2e"},
{file = "docling_parse-2.0.0-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:8b9f8e91656bf5c8c1ea99eb29b8b41cd1cd156e33d1fec530add4ee30705da9"}, {file = "docling_parse-2.0.1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:033129a2535408104867682ea3e2cc9cb2690790075f30365052789ab71a017b"},
{file = "docling_parse-2.0.0-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:bceb96fbbd286d71ba12bf876bc08f772a526a45e8e2dda8567b171c7a5a6c02"}, {file = "docling_parse-2.0.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:a0e16fca70848195487615fc61a04f9afdbfec34a672e92ce8cb6b8a54daca55"},
{file = "docling_parse-2.0.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:729c18b42e316b3cc64af9feb8fcb020e7dabe4d8d45ded424c2508b5284631b"}, {file = "docling_parse-2.0.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:acb83350f4f5010ca18a3a891c7751132abeb1e727e52f9b44a16bf8f65a6d3a"},
{file = "docling_parse-2.0.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:ac2725cdbc3f7ef5ea5351f6b7e78e93c3b271a72f827d42210fbe9cb46d9644"}, {file = "docling_parse-2.0.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:2286ec6d1dea6d0fc259fe4e375c79ace4499a6cce0b4341bdd689d54744efc4"},
{file = "docling_parse-2.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:965a7ffd5a1b83006ab47ca76be02725439e9dd80aaadf7f3258b81c5cd6ad22"}, {file = "docling_parse-2.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c83117642d2494a4baa92224f5d6de0fe37beb235ff4e48642786a160c5d3f8"},
{file = "docling_parse-2.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb1116f179dc2d8f13d2870dd6b0dfa443eea2994c0a75e4b7e8cae7d9f969b3"}, {file = "docling_parse-2.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:61d8808d9b50104c982bce8390900e4060ef3fa829e9449ac1bdbc0f0be61091"},
{file = "docling_parse-2.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:6a675b4aced7abc0f4b0df25af1d80b93feba55159b830b6c2b4eafb1719d389"}, {file = "docling_parse-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:07d918a5f98e5a78f0e9d08267242d81c35a788f21403d7c2bb4a862b3073c97"},
{file = "docling_parse-2.0.0-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:f3677b2ce7bfa1119573aa8ffbf7ece6d46ed1af55094c60d1a1f8fd8477dbe8"}, {file = "docling_parse-2.0.1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:1128f330cb56ba8bd58eed7221c92ef54b544c077dc2f04b0b2bbabe2a53e688"},
{file = "docling_parse-2.0.0-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:ff94ad7c2f3404904b05338e2efa5b7359078992bd293b046fceed5d98f653b0"}, {file = "docling_parse-2.0.1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:4a868f9057403c7dac57f8c086aa51d976500c191f21ff54d8923d1144714baa"},
{file = "docling_parse-2.0.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:fd280cbfef2040f8016ae010fe06be7b610b7ff3cba2014aea9193481c938f6e"}, {file = "docling_parse-2.0.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:1681679041cbda708e4961f3ce4ff9acea1eeadc203a94874a860104acc2b446"},
{file = "docling_parse-2.0.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:e5f4274fe619c4adf3d2daffca1ae14000c2498c006c971bbf77f250f6e9289d"}, {file = "docling_parse-2.0.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:747638746e4a81c74fc067a1b4efb9b35f8a223f7dee02c180cf6ceb80dc128c"},
{file = "docling_parse-2.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c612320c22353e93a0c74a59cd220bdd253e5cd18d281ea918f3200e92b6b29b"}, {file = "docling_parse-2.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d9938c58a6121be456334dbf116af8fdee00c1a257c983a3236b6dfc6e37cbf"},
{file = "docling_parse-2.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05aaa4fbcccfafd3c026b802c60a2968e4fe163f9f9cd1ed95823d4a5f93099c"}, {file = "docling_parse-2.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ae727fef7e107b5136f8182eb08e456881f7a7cd2a5635991c9e62b8cd0e8cc"},
{file = "docling_parse-2.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:2bad0b05512731c3c0a6b3057d7a8c407b219aafe6360644c3d64c9d0b9b1bac"}, {file = "docling_parse-2.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:3b3553d2a781528b7674c4b383b6be92b5ac8c68d657d51671a10c50ac370b45"},
{file = "docling_parse-2.0.0-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:404633fe50f2537486479243b12b279dc11929cb4307700edefc882d852e91bc"}, {file = "docling_parse-2.0.1-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:f9af29a48f9523a7b121b0350eb84a523ae961fc569dc7fcd2d5ed484bcf278d"},
{file = "docling_parse-2.0.0-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:75f23e8cbbfc80f79974b79d613cf86e67b7adb39fac713f2192ffdcce5d3bc5"}, {file = "docling_parse-2.0.1-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:76d895091553dc56fda99ff6019140819bf4a3bffb3c655b0f851e86a62b1775"},
{file = "docling_parse-2.0.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:247cc5044d6e9d2adc4d2ffd65f4db3776d3388dc76675f4c44b8c0e4063c85d"}, {file = "docling_parse-2.0.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:172605fad8ec32c4c5480152218d5b3085fffab17b958a3a44302ff5466ed0bd"},
{file = "docling_parse-2.0.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:42a2429bcad03a9e975af4e6390fc012eafc8b1adee93713d73c6dc374a38de3"}, {file = "docling_parse-2.0.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:5a900a076797754babe64369a7b49438639898b4cf3ce438ab1f0e9ba2fa78de"},
{file = "docling_parse-2.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b094164f0cf8dffad7ce9a220f8337e9cbc7cea1470f763b836c5cbd22703cec"}, {file = "docling_parse-2.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb64c9e353893181b7c9177b954ec2bb8925c62f5ec362934fc699bd180e171"},
{file = "docling_parse-2.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7628fc03ce9ea9424a5c4304059e1377b17f1a6a6ccffcd87b3a1a16776a36f1"}, {file = "docling_parse-2.0.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:706f39b1f417a79b13e539fd25e10ff8f2ac603ad61fa7f05586f4e067e84a18"},
{file = "docling_parse-2.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:6d805607b072d1facac69c277be52a36f71bdb6d0b2a748e8bbc3453f652ce1f"}, {file = "docling_parse-2.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e8f5809a94373d1b20f834d5916471d36e6d076e49b151f2a1196719505c3e8e"},
{file = "docling_parse-2.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f6948878bc4930b5d344a3b8ceb7f2550513b1888cf3c157ef466096fe848dd1"}, {file = "docling_parse-2.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a56e960cc915fb67fb97210d069dbf0cb195f4a12518edc182552fe8aaf103bd"},
{file = "docling_parse-2.0.0.tar.gz", hash = "sha256:6509f4776442593e7d2d9433375618edcac619a2a992ebaaed731be7ed6d42b7"}, {file = "docling_parse-2.0.1.tar.gz", hash = "sha256:27c3c1f22de2afede928ed1d139d0378faf31c55b3416dfe9be01e879a18072e"},
] ]
[package.dependencies] [package.dependencies]
@ -1363,70 +1363,70 @@ test = ["coverage[toml]", "ddt (>=1.1.1,!=1.4.3)", "mock", "mypy", "pre-commit",
[[package]] [[package]]
name = "grpcio" name = "grpcio"
version = "1.67.0" version = "1.67.1"
description = "HTTP/2-based RPC framework" description = "HTTP/2-based RPC framework"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "grpcio-1.67.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:bd79929b3bb96b54df1296cd3bf4d2b770bd1df6c2bdf549b49bab286b925cdc"}, {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
{file = "grpcio-1.67.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:16724ffc956ea42967f5758c2f043faef43cb7e48a51948ab593570570d1e68b"}, {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
{file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:2b7183c80b602b0ad816315d66f2fb7887614ead950416d60913a9a71c12560d"}, {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:43112046864317498a33bdc4797ae6a268c36345a910de9b9c17159d8346602f"},
{file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:efe32b45dd6d118f5ea2e5deaed417d8a14976325c93812dd831908522b402c9"}, {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9b929f13677b10f63124c1a410994a401cdd85214ad83ab67cc077fc7e480f0"},
{file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe89295219b9c9e47780a0f1c75ca44211e706d1c598242249fe717af3385ec8"}, {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d1797a8a3845437d327145959a2c0c47c05947c9eef5ff1a4c80e499dcc6fa"},
{file = "grpcio-1.67.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa8d025fae1595a207b4e47c2e087cb88d47008494db258ac561c00877d4c8f8"}, {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0489063974d1452436139501bf6b180f63d4977223ee87488fe36858c5725292"},
{file = "grpcio-1.67.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f95e15db43e75a534420e04822df91f645664bf4ad21dfaad7d51773c80e6bb4"}, {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9fd042de4a82e3e7aca44008ee2fb5da01b3e5adb316348c21980f7f58adc311"},
{file = "grpcio-1.67.0-cp310-cp310-win32.whl", hash = "sha256:a6b9a5c18863fd4b6624a42e2712103fb0f57799a3b29651c0e5b8119a519d65"}, {file = "grpcio-1.67.1-cp310-cp310-win32.whl", hash = "sha256:638354e698fd0c6c76b04540a850bf1db27b4d2515a19fcd5cf645c48d3eb1ed"},
{file = "grpcio-1.67.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6eb68493a05d38b426604e1dc93bfc0137c4157f7ab4fac5771fd9a104bbaa6"}, {file = "grpcio-1.67.1-cp310-cp310-win_amd64.whl", hash = "sha256:608d87d1bdabf9e2868b12338cd38a79969eaf920c89d698ead08f48de9c0f9e"},
{file = "grpcio-1.67.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:e91d154689639932305b6ea6f45c6e46bb51ecc8ea77c10ef25aa77f75443ad4"}, {file = "grpcio-1.67.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:7818c0454027ae3384235a65210bbf5464bd715450e30a3d40385453a85a70cb"},
{file = "grpcio-1.67.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cb204a742997277da678611a809a8409657b1398aaeebf73b3d9563b7d154c13"}, {file = "grpcio-1.67.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ea33986b70f83844cd00814cee4451055cd8cab36f00ac64a31f5bb09b31919e"},
{file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:ae6de510f670137e755eb2a74b04d1041e7210af2444103c8c95f193340d17ee"}, {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:c7a01337407dd89005527623a4a72c5c8e2894d22bead0895306b23c6695698f"},
{file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74b900566bdf68241118f2918d312d3bf554b2ce0b12b90178091ea7d0a17b3d"}, {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80b866f73224b0634f4312a4674c1be21b2b4afa73cb20953cbbb73a6b36c3cc"},
{file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4e95e43447a02aa603abcc6b5e727d093d161a869c83b073f50b9390ecf0fa8"}, {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fff78ba10d4250bfc07a01bd6254a6d87dc67f9627adece85c0b2ed754fa96"},
{file = "grpcio-1.67.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0bb94e66cd8f0baf29bd3184b6aa09aeb1a660f9ec3d85da615c5003154bc2bf"}, {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a23cbcc5bb11ea7dc6163078be36c065db68d915c24f5faa4f872c573bb400f"},
{file = "grpcio-1.67.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:82e5bd4b67b17c8c597273663794a6a46a45e44165b960517fe6d8a2f7f16d23"}, {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1a65b503d008f066e994f34f456e0647e5ceb34cfcec5ad180b1b44020ad4970"},
{file = "grpcio-1.67.0-cp311-cp311-win32.whl", hash = "sha256:7fc1d2b9fd549264ae585026b266ac2db53735510a207381be509c315b4af4e8"}, {file = "grpcio-1.67.1-cp311-cp311-win32.whl", hash = "sha256:e29ca27bec8e163dca0c98084040edec3bc49afd10f18b412f483cc68c712744"},
{file = "grpcio-1.67.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac11ecb34a86b831239cc38245403a8de25037b448464f95c3315819e7519772"}, {file = "grpcio-1.67.1-cp311-cp311-win_amd64.whl", hash = "sha256:786a5b18544622bfb1e25cc08402bd44ea83edfb04b93798d85dca4d1a0b5be5"},
{file = "grpcio-1.67.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:227316b5631260e0bef8a3ce04fa7db4cc81756fea1258b007950b6efc90c05d"}, {file = "grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953"},
{file = "grpcio-1.67.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d90cfdafcf4b45a7a076e3e2a58e7bc3d59c698c4f6470b0bb13a4d869cf2273"}, {file = "grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb"},
{file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:77196216d5dd6f99af1c51e235af2dd339159f657280e65ce7e12c1a8feffd1d"}, {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0"},
{file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c05a26a0f7047f720da41dc49406b395c1470eef44ff7e2c506a47ac2c0591"}, {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af"},
{file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3840994689cc8cbb73d60485c594424ad8adb56c71a30d8948d6453083624b52"}, {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e"},
{file = "grpcio-1.67.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:5a1e03c3102b6451028d5dc9f8591131d6ab3c8a0e023d94c28cb930ed4b5f81"}, {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75"},
{file = "grpcio-1.67.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:682968427a63d898759474e3b3178d42546e878fdce034fd7474ef75143b64e3"}, {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38"},
{file = "grpcio-1.67.0-cp312-cp312-win32.whl", hash = "sha256:d01793653248f49cf47e5695e0a79805b1d9d4eacef85b310118ba1dfcd1b955"}, {file = "grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78"},
{file = "grpcio-1.67.0-cp312-cp312-win_amd64.whl", hash = "sha256:985b2686f786f3e20326c4367eebdaed3e7aa65848260ff0c6644f817042cb15"}, {file = "grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc"},
{file = "grpcio-1.67.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:8c9a35b8bc50db35ab8e3e02a4f2a35cfba46c8705c3911c34ce343bd777813a"}, {file = "grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b"},
{file = "grpcio-1.67.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:42199e704095b62688998c2d84c89e59a26a7d5d32eed86d43dc90e7a3bd04aa"}, {file = "grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1"},
{file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:c4c425f440fb81f8d0237c07b9322fc0fb6ee2b29fbef5f62a322ff8fcce240d"}, {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af"},
{file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:323741b6699cd2b04a71cb38f502db98f90532e8a40cb675393d248126a268af"}, {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955"},
{file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:662c8e105c5e5cee0317d500eb186ed7a93229586e431c1bf0c9236c2407352c"}, {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8"},
{file = "grpcio-1.67.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f6bd2ab135c64a4d1e9e44679a616c9bc944547357c830fafea5c3caa3de5153"}, {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62"},
{file = "grpcio-1.67.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:2f55c1e0e2ae9bdd23b3c63459ee4c06d223b68aeb1961d83c48fb63dc29bc03"}, {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb"},
{file = "grpcio-1.67.0-cp313-cp313-win32.whl", hash = "sha256:fd6bc27861e460fe28e94226e3673d46e294ca4673d46b224428d197c5935e69"}, {file = "grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121"},
{file = "grpcio-1.67.0-cp313-cp313-win_amd64.whl", hash = "sha256:cf51d28063338608cd8d3cd64677e922134837902b70ce00dad7f116e3998210"}, {file = "grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba"},
{file = "grpcio-1.67.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:7f200aca719c1c5dc72ab68be3479b9dafccdf03df530d137632c534bb6f1ee3"}, {file = "grpcio-1.67.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:178f5db771c4f9a9facb2ab37a434c46cb9be1a75e820f187ee3d1e7805c4f65"},
{file = "grpcio-1.67.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0892dd200ece4822d72dd0952f7112c542a487fc48fe77568deaaa399c1e717d"}, {file = "grpcio-1.67.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0f3e49c738396e93b7ba9016e153eb09e0778e776df6090c1b8c91877cc1c426"},
{file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:f4d613fbf868b2e2444f490d18af472ccb47660ea3df52f068c9c8801e1f3e85"}, {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:24e8a26dbfc5274d7474c27759b54486b8de23c709d76695237515bc8b5baeab"},
{file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c69bf11894cad9da00047f46584d5758d6ebc9b5950c0dc96fec7e0bce5cde9"}, {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b6c16489326d79ead41689c4b84bc40d522c9a7617219f4ad94bc7f448c5085"},
{file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9bca3ca0c5e74dea44bf57d27e15a3a3996ce7e5780d61b7c72386356d231db"}, {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e6a4dcf5af7bbc36fd9f81c9f372e8ae580870a9e4b6eafe948cd334b81cf3"},
{file = "grpcio-1.67.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:014dfc020e28a0d9be7e93a91f85ff9f4a87158b7df9952fe23cc42d29d31e1e"}, {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:95b5f2b857856ed78d72da93cd7d09b6db8ef30102e5e7fe0961fe4d9f7d48e8"},
{file = "grpcio-1.67.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d4ea4509d42c6797539e9ec7496c15473177ce9abc89bc5c71e7abe50fc25737"}, {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b49359977c6ec9f5d0573ea4e0071ad278ef905aa74e420acc73fd28ce39e9ce"},
{file = "grpcio-1.67.0-cp38-cp38-win32.whl", hash = "sha256:9d75641a2fca9ae1ae86454fd25d4c298ea8cc195dbc962852234d54a07060ad"}, {file = "grpcio-1.67.1-cp38-cp38-win32.whl", hash = "sha256:f5b76ff64aaac53fede0cc93abf57894ab2a7362986ba22243d06218b93efe46"},
{file = "grpcio-1.67.0-cp38-cp38-win_amd64.whl", hash = "sha256:cff8e54d6a463883cda2fab94d2062aad2f5edd7f06ae3ed030f2a74756db365"}, {file = "grpcio-1.67.1-cp38-cp38-win_amd64.whl", hash = "sha256:804c6457c3cd3ec04fe6006c739579b8d35c86ae3298ffca8de57b493524b771"},
{file = "grpcio-1.67.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:62492bd534979e6d7127b8a6b29093161a742dee3875873e01964049d5250a74"}, {file = "grpcio-1.67.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:a25bdea92b13ff4d7790962190bf6bf5c4639876e01c0f3dda70fc2769616335"},
{file = "grpcio-1.67.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eef1dce9d1a46119fd09f9a992cf6ab9d9178b696382439446ca5f399d7b96fe"}, {file = "grpcio-1.67.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc491ae35a13535fd9196acb5afe1af37c8237df2e54427be3eecda3653127e"},
{file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f623c57a5321461c84498a99dddf9d13dac0e40ee056d884d6ec4ebcab647a78"}, {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:85f862069b86a305497e74d0dc43c02de3d1d184fc2c180993aa8aa86fbd19b8"},
{file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54d16383044e681f8beb50f905249e4e7261dd169d4aaf6e52eab67b01cbbbe2"}, {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec74ef02010186185de82cc594058a3ccd8d86821842bbac9873fd4a2cf8be8d"},
{file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2a44e572fb762c668e4812156b81835f7aba8a721b027e2d4bb29fb50ff4d33"}, {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01f616a964e540638af5130469451cf580ba8c7329f45ca998ab66e0c7dcdb04"},
{file = "grpcio-1.67.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:391df8b0faac84d42f5b8dfc65f5152c48ed914e13c522fd05f2aca211f8bfad"}, {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:299b3d8c4f790c6bcca485f9963b4846dd92cf6f1b65d3697145d005c80f9fe8"},
{file = "grpcio-1.67.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfd9306511fdfc623a1ba1dc3bc07fbd24e6cfbe3c28b4d1e05177baa2f99617"}, {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:60336bff760fbb47d7e86165408126f1dded184448e9a4c892189eb7c9d3f90f"},
{file = "grpcio-1.67.0-cp39-cp39-win32.whl", hash = "sha256:30d47dbacfd20cbd0c8be9bfa52fdb833b395d4ec32fe5cff7220afc05d08571"}, {file = "grpcio-1.67.1-cp39-cp39-win32.whl", hash = "sha256:5ed601c4c6008429e3d247ddb367fe8c7259c355757448d7c1ef7bd4a6739e8e"},
{file = "grpcio-1.67.0-cp39-cp39-win_amd64.whl", hash = "sha256:f55f077685f61f0fbd06ea355142b71e47e4a26d2d678b3ba27248abfe67163a"}, {file = "grpcio-1.67.1-cp39-cp39-win_amd64.whl", hash = "sha256:5db70d32d6703b89912af16d6d45d78406374a8b8ef0d28140351dd0ec610e98"},
{file = "grpcio-1.67.0.tar.gz", hash = "sha256:e090b2553e0da1c875449c8e75073dd4415dd71c9bde6a406240fdf4c0ee467c"}, {file = "grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732"},
] ]
[package.extras] [package.extras]
protobuf = ["grpcio-tools (>=1.67.0)"] protobuf = ["grpcio-tools (>=1.67.1)"]
[[package]] [[package]]
name = "h11" name = "h11"
@ -1487,13 +1487,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]] [[package]]
name = "huggingface-hub" name = "huggingface-hub"
version = "0.26.1" version = "0.26.2"
description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
optional = false optional = false
python-versions = ">=3.8.0" python-versions = ">=3.8.0"
files = [ files = [
{file = "huggingface_hub-0.26.1-py3-none-any.whl", hash = "sha256:5927a8fc64ae68859cd954b7cc29d1c8390a5e15caba6d3d349c973be8fdacf3"}, {file = "huggingface_hub-0.26.2-py3-none-any.whl", hash = "sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46"},
{file = "huggingface_hub-0.26.1.tar.gz", hash = "sha256:414c0d9b769eecc86c70f9d939d0f48bb28e8461dd1130021542eff0212db890"}, {file = "huggingface_hub-0.26.2.tar.gz", hash = "sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b"},
] ]
[package.dependencies] [package.dependencies]
@ -1660,13 +1660,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
[[package]] [[package]]
name = "ipython" name = "ipython"
version = "8.28.0" version = "8.29.0"
description = "IPython: Productive Interactive Computing" description = "IPython: Productive Interactive Computing"
optional = false optional = false
python-versions = ">=3.10" python-versions = ">=3.10"
files = [ files = [
{file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"}, {file = "ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8"},
{file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"}, {file = "ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb"},
] ]
[package.dependencies] [package.dependencies]
@ -2031,13 +2031,13 @@ test-ui = ["calysto-bash"]
[[package]] [[package]]
name = "keyring" name = "keyring"
version = "25.4.1" version = "25.5.0"
description = "Store and access your passwords safely." description = "Store and access your passwords safely."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "keyring-25.4.1-py3-none-any.whl", hash = "sha256:5426f817cf7f6f007ba5ec722b1bcad95a75b27d780343772ad76b17cb47b0bf"}, {file = "keyring-25.5.0-py3-none-any.whl", hash = "sha256:e67f8ac32b04be4714b42fe84ce7dad9c40985b9ca827c592cc303e7c26d9741"},
{file = "keyring-25.4.1.tar.gz", hash = "sha256:b07ebc55f3e8ed86ac81dd31ef14e81ace9dd9c3d4b5d77a6e9a2016d0d71a1b"}, {file = "keyring-25.5.0.tar.gz", hash = "sha256:4c753b3ec91717fe713c4edd522d625889d8973a349b0e582622f49766de58e6"},
] ]
[package.dependencies] [package.dependencies]
@ -4437,13 +4437,13 @@ testutils = ["gitpython (>3)"]
[[package]] [[package]]
name = "pymdown-extensions" name = "pymdown-extensions"
version = "10.11.2" version = "10.12"
description = "Extension pack for Python Markdown." description = "Extension pack for Python Markdown."
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"}, {file = "pymdown_extensions-10.12-py3-none-any.whl", hash = "sha256:49f81412242d3527b8b4967b990df395c89563043bc51a3d2d7d500e52123b77"},
{file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"}, {file = "pymdown_extensions-10.12.tar.gz", hash = "sha256:b0ee1e0b2bef1071a47891ab17003bfe5bf824a398e13f49f8ed653b699369a7"},
] ]
[package.dependencies] [package.dependencies]
@ -4455,13 +4455,13 @@ extra = ["pygments (>=2.12)"]
[[package]] [[package]]
name = "pymilvus" name = "pymilvus"
version = "2.4.8" version = "2.4.9"
description = "Python Sdk for Milvus" description = "Python Sdk for Milvus"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "pymilvus-2.4.8-py3-none-any.whl", hash = "sha256:5824f8ef4ecb14cfd4b205bf976aa52576c3a83c3cd848d21c8f5f9bb99b29e1"}, {file = "pymilvus-2.4.9-py3-none-any.whl", hash = "sha256:45313607d2c164064bdc44e0f933cb6d6afa92e9efcc7f357c5240c57db58fbe"},
{file = "pymilvus-2.4.8.tar.gz", hash = "sha256:0ddd18a060635fc8f1d1ab5635d9cc340ef29a97783b73db186df6334fa31ee2"}, {file = "pymilvus-2.4.9.tar.gz", hash = "sha256:0937663700007c23a84cfc0656160b301f6ff9247aaec4c96d599a6b43572136"},
] ]
[package.dependencies] [package.dependencies]
@ -5712,23 +5712,23 @@ train = ["accelerate (>=0.20.3)", "datasets"]
[[package]] [[package]]
name = "setuptools" name = "setuptools"
version = "75.2.0" version = "75.3.0"
description = "Easily download, build, install, upgrade, and uninstall Python packages" description = "Easily download, build, install, upgrade, and uninstall Python packages"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
{file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
] ]
[package.extras] [package.extras]
check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
cover = ["pytest-cov"] cover = ["pytest-cov"]
doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
enabler = ["pytest-enabler (>=2.2)"] enabler = ["pytest-enabler (>=2.2)"]
test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
[[package]] [[package]]
name = "shapely" name = "shapely"
@ -5995,13 +5995,13 @@ zarr = ["fsspec", "zarr"]
[[package]] [[package]]
name = "tinycss2" name = "tinycss2"
version = "1.3.0" version = "1.4.0"
description = "A tiny CSS parser" description = "A tiny CSS parser"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "tinycss2-1.3.0-py3-none-any.whl", hash = "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7"}, {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"},
{file = "tinycss2-1.3.0.tar.gz", hash = "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d"}, {file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"},
] ]
[package.dependencies] [package.dependencies]
@ -6373,13 +6373,13 @@ files = [
[[package]] [[package]]
name = "tqdm" name = "tqdm"
version = "4.66.5" version = "4.66.6"
description = "Fast, Extensible Progress Meter" description = "Fast, Extensible Progress Meter"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"},
{file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"},
] ]
[package.dependencies] [package.dependencies]
@ -6408,13 +6408,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
[[package]] [[package]]
name = "transformers" name = "transformers"
version = "4.45.2" version = "4.46.0"
description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
optional = false optional = false
python-versions = ">=3.8.0" python-versions = ">=3.8.0"
files = [ files = [
{file = "transformers-4.45.2-py3-none-any.whl", hash = "sha256:c551b33660cfc815bae1f9f097ecfd1e65be623f13c6ee0dda372bd881460210"}, {file = "transformers-4.46.0-py3-none-any.whl", hash = "sha256:e161268ae8bee315eb9e9b4c0b27f1bd6980f91e0fc292d75249193d339704c0"},
{file = "transformers-4.45.2.tar.gz", hash = "sha256:72bc390f6b203892561f05f86bbfaa0e234aab8e927a83e62b9d92ea7e3ae101"}, {file = "transformers-4.46.0.tar.gz", hash = "sha256:3a9e2eb537094db11c3652334d281afa4766c0e5091c4dcdb454e9921bb0d2b7"},
] ]
[package.dependencies] [package.dependencies]
@ -6432,13 +6432,13 @@ tqdm = ">=4.27"
[package.extras] [package.extras]
accelerate = ["accelerate (>=0.26.0)"] accelerate = ["accelerate (>=0.26.0)"]
agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"] agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"] all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision"]
audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"] audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
benchmark = ["optimum-benchmark (>=0.3.0)"] benchmark = ["optimum-benchmark (>=0.3.0)"]
codecarbon = ["codecarbon (==1.2.0)"] codecarbon = ["codecarbon (==1.2.0)"]
deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"] deepspeed = ["accelerate (>=0.26.0)", "deepspeed (>=0.9.3)"]
deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"] deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.26.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "nltk (<=3.8.1)", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "scipy (<1.13.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.20,<0.21)", "urllib3 (<2.0.0)"] dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1,<0.14.0)", "libcst", "librosa", "nltk (<=3.8.1)", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>2.9,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.20,<0.21)", "urllib3 (<2.0.0)"]
dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"] dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.26.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "libcst", "librosa", "nltk (<=3.8.1)", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-rich", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rich", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.5.1)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm (<=0.9.16)", "tokenizers (>=0.20,<0.21)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"] flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)", "scipy (<1.13.0)"]
@ -6472,7 +6472,7 @@ torch = ["accelerate (>=0.26.0)", "torch"]
torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"] torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"] torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.20,<0.21)", "torch", "tqdm (>=4.27)"] torchhub = ["filelock", "huggingface-hub (>=0.23.2,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.20,<0.21)", "torch", "tqdm (>=4.27)"]
video = ["av (==9.2.0)", "decord (==0.6.0)"] video = ["av (==9.2.0)"]
vision = ["Pillow (>=10.0.1,<=15.0)"] vision = ["Pillow (>=10.0.1,<=15.0)"]
[[package]] [[package]]
@ -6713,13 +6713,13 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]] [[package]]
name = "virtualenv" name = "virtualenv"
version = "20.27.0" version = "20.27.1"
description = "Virtual Python Environment builder" description = "Virtual Python Environment builder"
optional = false optional = false
python-versions = ">=3.8" python-versions = ">=3.8"
files = [ files = [
{file = "virtualenv-20.27.0-py3-none-any.whl", hash = "sha256:44a72c29cceb0ee08f300b314848c86e57bf8d1f13107a5e671fb9274138d655"}, {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"},
{file = "virtualenv-20.27.0.tar.gz", hash = "sha256:2ca56a68ed615b8fe4326d11a0dca5dfbe8fd68510fb6c6349163bed3c15f2b2"}, {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"},
] ]
[package.dependencies] [package.dependencies]
@ -7044,93 +7044,93 @@ files = [
[[package]] [[package]]
name = "yarl" name = "yarl"
version = "1.16.0" version = "1.17.0"
description = "Yet another URL library" description = "Yet another URL library"
optional = false optional = false
python-versions = ">=3.9" python-versions = ">=3.9"
files = [ files = [
{file = "yarl-1.16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:32468f41242d72b87ab793a86d92f885355bcf35b3355aa650bfa846a5c60058"}, {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d8715edfe12eee6f27f32a3655f38d6c7410deb482158c0b7d4b7fad5d07628"},
{file = "yarl-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:234f3a3032b505b90e65b5bc6652c2329ea7ea8855d8de61e1642b74b4ee65d2"}, {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1803bf2a7a782e02db746d8bd18f2384801bc1d108723840b25e065b116ad726"},
{file = "yarl-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a0296040e5cddf074c7f5af4a60f3fc42c0237440df7bcf5183be5f6c802ed5"}, {file = "yarl-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e66589110e20c2951221a938fa200c7aa134a8bdf4e4dc97e6b21539ff026d4"},
{file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de6c14dd7c7c0badba48157474ea1f03ebee991530ba742d381b28d4f314d6f3"}, {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7069d411cfccf868e812497e0ec4acb7c7bf8d684e93caa6c872f1e6f5d1664d"},
{file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b140e532fe0266003c936d017c1ac301e72ee4a3fd51784574c05f53718a55d8"}, {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbf70ba16118db3e4b0da69dcde9d4d4095d383c32a15530564c283fa38a7c52"},
{file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:019f5d58093402aa8f6661e60fd82a28746ad6d156f6c5336a70a39bd7b162b9"}, {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0bc53cc349675b32ead83339a8de79eaf13b88f2669c09d4962322bb0f064cbc"},
{file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c42998fd1cbeb53cd985bff0e4bc25fbe55fd6eb3a545a724c1012d69d5ec84"}, {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6aa18a402d1c80193ce97c8729871f17fd3e822037fbd7d9b719864018df746"},
{file = "yarl-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c7c30fb38c300fe8140df30a046a01769105e4cf4282567a29b5cdb635b66c4"}, {file = "yarl-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d89c5bc701861cfab357aa0cd039bc905fe919997b8c312b4b0c358619c38d4d"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e49e0fd86c295e743fd5be69b8b0712f70a686bc79a16e5268386c2defacaade"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b728bdf38ca58f2da1d583e4af4ba7d4cd1a58b31a363a3137a8159395e7ecc7"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:b9ca7b9147eb1365c8bab03c003baa1300599575effad765e0b07dd3501ea9af"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:5542e57dc15d5473da5a39fbde14684b0cc4301412ee53cbab677925e8497c11"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:27e11db3f1e6a51081a981509f75617b09810529de508a181319193d320bc5c7"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e564b57e5009fb150cb513804d7e9e9912fee2e48835638f4f47977f88b4a39c"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8994c42f4ca25df5380ddf59f315c518c81df6a68fed5bb0c159c6cb6b92f120"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:eb3c4cff524b4c1c1dba3a6da905edb1dfd2baf6f55f18a58914bbb2d26b59e1"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:542fa8e09a581bcdcbb30607c7224beff3fdfb598c798ccd28a8184ffc18b7eb"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:05e13f389038842da930d439fbed63bdce3f7644902714cb68cf527c971af804"},
{file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2bd6a51010c7284d191b79d3b56e51a87d8e1c03b0902362945f15c3d50ed46b"}, {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:153c38ee2b4abba136385af4467459c62d50f2a3f4bde38c7b99d43a20c143ef"},
{file = "yarl-1.16.0-cp310-cp310-win32.whl", hash = "sha256:178ccb856e265174a79f59721031060f885aca428983e75c06f78aa24b91d929"}, {file = "yarl-1.17.0-cp310-cp310-win32.whl", hash = "sha256:4065b4259d1ae6f70fd9708ffd61e1c9c27516f5b4fae273c41028afcbe3a094"},
{file = "yarl-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe8bba2545427418efc1929c5c42852bdb4143eb8d0a46b09de88d1fe99258e7"}, {file = "yarl-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:abf366391a02a8335c5c26163b5fe6f514cc1d79e74d8bf3ffab13572282368e"},
{file = "yarl-1.16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d8643975a0080f361639787415a038bfc32d29208a4bf6b783ab3075a20b1ef3"}, {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19a4fe0279626c6295c5b0c8c2bb7228319d2e985883621a6e87b344062d8135"},
{file = "yarl-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:676d96bafc8c2d0039cea0cd3fd44cee7aa88b8185551a2bb93354668e8315c2"}, {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cadd0113f4db3c6b56868d6a19ca6286f5ccfa7bc08c27982cf92e5ed31b489a"},
{file = "yarl-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d9525f03269e64310416dbe6c68d3b23e5d34aaa8f47193a1c45ac568cecbc49"}, {file = "yarl-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60d6693eef43215b1ccfb1df3f6eae8db30a9ff1e7989fb6b2a6f0b468930ee8"},
{file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b37d5ec034e668b22cf0ce1074d6c21fd2a08b90d11b1b73139b750a8b0dd97"}, {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb8bf3843e1fa8cf3fe77813c512818e57368afab7ebe9ef02446fe1a10b492"},
{file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f32c4cb7386b41936894685f6e093c8dfaf0960124d91fe0ec29fe439e201d0"}, {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2a5b35fd1d8d90443e061d0c8669ac7600eec5c14c4a51f619e9e105b136715"},
{file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b8e265a0545637492a7e12fd7038370d66c9375a61d88c5567d0e044ded9202"}, {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5bf17b32f392df20ab5c3a69d37b26d10efaa018b4f4e5643c7520d8eee7ac7"},
{file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:789a3423f28a5fff46fbd04e339863c169ece97c827b44de16e1a7a42bc915d2"}, {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f51b529b958cd06e78158ff297a8bf57b4021243c179ee03695b5dbf9cb6e1"},
{file = "yarl-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1d1f45e3e8d37c804dca99ab3cf4ab3ed2e7a62cd82542924b14c0a4f46d243"}, {file = "yarl-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fcaa06bf788e19f913d315d9c99a69e196a40277dc2c23741a1d08c93f4d430"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:621280719c4c5dad4c1391160a9b88925bb8b0ff6a7d5af3224643024871675f"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:32f3ee19ff0f18a7a522d44e869e1ebc8218ad3ae4ebb7020445f59b4bbe5897"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed097b26f18a1f5ff05f661dc36528c5f6735ba4ce8c9645e83b064665131349"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a4fb69a81ae2ec2b609574ae35420cf5647d227e4d0475c16aa861dd24e840b0"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2f1fe2b2e3ee418862f5ebc0c0083c97f6f6625781382f828f6d4e9b614eba9b"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7bacc8b77670322132a1b2522c50a1f62991e2f95591977455fd9a398b4e678d"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:87dd10bc0618991c66cee0cc65fa74a45f4ecb13bceec3c62d78ad2e42b27a16"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:437bf6eb47a2d20baaf7f6739895cb049e56896a5ffdea61a4b25da781966e8b"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:4199db024b58a8abb2cfcedac7b1292c3ad421684571aeb622a02f242280e8d6"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30534a03c87484092080e3b6e789140bd277e40f453358900ad1f0f2e61fc8ec"},
{file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:99a9dcd4b71dd5f5f949737ab3f356cfc058c709b4f49833aeffedc2652dac56"}, {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b30df4ff98703649915144be6f0df3b16fd4870ac38a09c56d5d9e54ff2d5f96"},
{file = "yarl-1.16.0-cp311-cp311-win32.whl", hash = "sha256:a9394c65ae0ed95679717d391c862dece9afacd8fa311683fc8b4362ce8a410c"}, {file = "yarl-1.17.0-cp311-cp311-win32.whl", hash = "sha256:263b487246858e874ab53e148e2a9a0de8465341b607678106829a81d81418c6"},
{file = "yarl-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:5b9101f528ae0f8f65ac9d64dda2bb0627de8a50344b2f582779f32fda747c1d"}, {file = "yarl-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:07055a9e8b647a362e7d4810fe99d8f98421575e7d2eede32e008c89a65a17bd"},
{file = "yarl-1.16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4ffb7c129707dd76ced0a4a4128ff452cecf0b0e929f2668ea05a371d9e5c104"}, {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:84095ab25ba69a8fa3fb4936e14df631b8a71193fe18bd38be7ecbe34d0f5512"},
{file = "yarl-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1a5e9d8ce1185723419c487758d81ac2bde693711947032cce600ca7c9cda7d6"}, {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02608fb3f6df87039212fc746017455ccc2a5fc96555ee247c45d1e9f21f1d7b"},
{file = "yarl-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d743e3118b2640cef7768ea955378c3536482d95550222f908f392167fe62059"}, {file = "yarl-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13468d291fe8c12162b7cf2cdb406fe85881c53c9e03053ecb8c5d3523822cd9"},
{file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26768342f256e6e3c37533bf9433f5f15f3e59e3c14b2409098291b3efaceacb"}, {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8da3f8f368fb7e2f052fded06d5672260c50b5472c956a5f1bd7bf474ae504ab"},
{file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1b0796168b953bca6600c5f97f5ed407479889a36ad7d17183366260f29a6b9"}, {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec0507ab6523980bed050137007c76883d941b519aca0e26d4c1ec1f297dd646"},
{file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:858728086914f3a407aa7979cab743bbda1fe2bdf39ffcd991469a370dd7414d"}, {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08fc76df7fd8360e9ff30e6ccc3ee85b8dbd6ed5d3a295e6ec62bcae7601b932"},
{file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5570e6d47bcb03215baf4c9ad7bf7c013e56285d9d35013541f9ac2b372593e7"}, {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d522f390686acb6bab2b917dd9ca06740c5080cd2eaa5aef8827b97e967319d"},
{file = "yarl-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66ea8311422a7ba1fc79b4c42c2baa10566469fe5a78500d4e7754d6e6db8724"}, {file = "yarl-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:147c527a80bb45b3dcd6e63401af8ac574125d8d120e6afe9901049286ff64ef"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:649bddcedee692ee8a9b7b6e38582cb4062dc4253de9711568e5620d8707c2a3"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:24cf43bcd17a0a1f72284e47774f9c60e0bf0d2484d5851f4ddf24ded49f33c6"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:3a91654adb7643cb21b46f04244c5a315a440dcad63213033826549fa2435f71"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c28a44b9e0fba49c3857360e7ad1473fc18bc7f6659ca08ed4f4f2b9a52c75fa"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b439cae82034ade094526a8f692b9a2b5ee936452de5e4c5f0f6c48df23f8604"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:350cacb2d589bc07d230eb995d88fcc646caad50a71ed2d86df533a465a4e6e1"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:571f781ae8ac463ce30bacebfaef2c6581543776d5970b2372fbe31d7bf31a07"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fd1ab1373274dea1c6448aee420d7b38af163b5c4732057cd7ee9f5454efc8b1"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:aa7943f04f36d6cafc0cf53ea89824ac2c37acbdb4b316a654176ab8ffd0f968"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4934e0f96dadc567edc76d9c08181633c89c908ab5a3b8f698560124167d9488"},
{file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1a5cf32539373ff39d97723e39a9283a7277cbf1224f7aef0c56c9598b6486c3"}, {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8d0a278170d75c88e435a1ce76557af6758bfebc338435b2eba959df2552163e"},
{file = "yarl-1.16.0-cp312-cp312-win32.whl", hash = "sha256:a5b6c09b9b4253d6a208b0f4a2f9206e511ec68dce9198e0fbec4f160137aa67"}, {file = "yarl-1.17.0-cp312-cp312-win32.whl", hash = "sha256:61584f33196575a08785bb56db6b453682c88f009cd9c6f338a10f6737ce419f"},
{file = "yarl-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:1208ca14eed2fda324042adf8d6c0adf4a31522fa95e0929027cd487875f0240"}, {file = "yarl-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:9987a439ad33a7712bd5bbd073f09ad10d38640425fa498ecc99d8aa064f8fc4"},
{file = "yarl-1.16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5ace0177520bd4caa99295a9b6fb831d0e9a57d8e0501a22ffaa61b4c024283"}, {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8deda7b8eb15a52db94c2014acdc7bdd14cb59ec4b82ac65d2ad16dc234a109e"},
{file = "yarl-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7118bdb5e3ed81acaa2095cba7ec02a0fe74b52a16ab9f9ac8e28e53ee299732"}, {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56294218b348dcbd3d7fce0ffd79dd0b6c356cb2a813a1181af730b7c40de9e7"},
{file = "yarl-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38fec8a2a94c58bd47c9a50a45d321ab2285ad133adefbbadf3012c054b7e656"}, {file = "yarl-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fab91292f51c884b290ebec0b309a64a5318860ccda0c4940e740425a67b6b7"},
{file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8791d66d81ee45866a7bb15a517b01a2bcf583a18ebf5d72a84e6064c417e64b"}, {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cf93fa61ff4d9c7d40482ce1a2c9916ca435e34a1b8451e17f295781ccc034f"},
{file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cf936ba67bc6c734f3aa1c01391da74ab7fc046a9f8bbfa230b8393b90cf472"}, {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:261be774a0d71908c8830c33bacc89eef15c198433a8cc73767c10eeeb35a7d0"},
{file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1aab176dd55b59f77a63b27cffaca67d29987d91a5b615cbead41331e6b7428"}, {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deec9693b67f6af856a733b8a3e465553ef09e5e8ead792f52c25b699b8f9e6e"},
{file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995d0759004c08abd5d1b81300a91d18c8577c6389300bed1c7c11675105a44d"}, {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c804b07622ba50a765ca7fb8145512836ab65956de01307541def869e4a456c9"},
{file = "yarl-1.16.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bc22e00edeb068f71967ab99081e9406cd56dbed864fc3a8259442999d71552"}, {file = "yarl-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d013a7c9574e98c14831a8f22d27277688ec3b2741d0188ac01a910b009987a"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:35b4f7842154176523e0a63c9b871168c69b98065d05a4f637fce342a6a2693a"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e2cfcba719bd494c7413dcf0caafb51772dec168c7c946e094f710d6aa70494e"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:7ace71c4b7a0c41f317ae24be62bb61e9d80838d38acb20e70697c625e71f120"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c068aba9fc5b94dfae8ea1cedcbf3041cd4c64644021362ffb750f79837e881f"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8f639e3f5795a6568aa4f7d2ac6057c757dcd187593679f035adbf12b892bb00"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3616df510ffac0df3c9fa851a40b76087c6c89cbcea2de33a835fc80f9faac24"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e8be3aff14f0120ad049121322b107f8a759be76a6a62138322d4c8a337a9e2c"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:755d6176b442fba9928a4df787591a6a3d62d4969f05c406cad83d296c5d4e05"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:122d8e7986043d0549e9eb23c7fd23be078be4b70c9eb42a20052b3d3149c6f2"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c18f6e708d1cf9ff5b1af026e697ac73bea9cb70ee26a2b045b112548579bed2"},
{file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0fd9c227990f609c165f56b46107d0bc34553fe0387818c42c02f77974402c36"}, {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b937c216b6dee8b858c6afea958de03c5ff28406257d22b55c24962a2baf6fd"},
{file = "yarl-1.16.0-cp313-cp313-win32.whl", hash = "sha256:595ca5e943baed31d56b33b34736461a371c6ea0038d3baec399949dd628560b"}, {file = "yarl-1.17.0-cp313-cp313-win32.whl", hash = "sha256:d0131b14cb545c1a7bd98f4565a3e9bdf25a1bd65c83fc156ee5d8a8499ec4a3"},
{file = "yarl-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:921b81b8d78f0e60242fb3db615ea3f368827a76af095d5a69f1c3366db3f596"}, {file = "yarl-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:01c96efa4313c01329e88b7e9e9e1b2fc671580270ddefdd41129fa8d0db7696"},
{file = "yarl-1.16.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab2b2ac232110a1fdb0d3ffcd087783edd3d4a6ced432a1bf75caf7b7be70916"}, {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0d44f67e193f0a7acdf552ecb4d1956a3a276c68e7952471add9f93093d1c30d"},
{file = "yarl-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7f8713717a09acbfee7c47bfc5777e685539fefdd34fa72faf504c8be2f3df4e"}, {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:16ea0aa5f890cdcb7ae700dffa0397ed6c280840f637cd07bffcbe4b8d68b985"},
{file = "yarl-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cdcffe1dbcb4477d2b4202f63cd972d5baa155ff5a3d9e35801c46a415b7f71a"}, {file = "yarl-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cf5469dc7dcfa65edf5cc3a6add9f84c5529c6b556729b098e81a09a92e60e51"},
{file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a91217208306d82357c67daeef5162a41a28c8352dab7e16daa82e3718852a7"}, {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e662bf2f6e90b73cf2095f844e2bc1fda39826472a2aa1959258c3f2a8500a2f"},
{file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ab3ed42c78275477ea8e917491365e9a9b69bb615cb46169020bd0aa5e2d6d3"}, {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8260e88f1446904ba20b558fa8ce5d0ab9102747238e82343e46d056d7304d7e"},
{file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:707ae579ccb3262dfaef093e202b4c3fb23c3810e8df544b1111bd2401fd7b09"}, {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dc16477a4a2c71e64c5d3d15d7ae3d3a6bb1e8b955288a9f73c60d2a391282f"},
{file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7a852d1cd0b8d8b37fc9d7f8581152add917a98cfe2ea6e241878795f917ae"}, {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46027e326cecd55e5950184ec9d86c803f4f6fe4ba6af9944a0e537d643cdbe0"},
{file = "yarl-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3f1cc3d3d4dc574bebc9b387f6875e228ace5748a7c24f49d8f01ac1bc6c31b"}, {file = "yarl-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc95e46c92a2b6f22e70afe07e34dbc03a4acd07d820204a6938798b16f4014f"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5ff96da263740779b0893d02b718293cc03400c3a208fc8d8cd79d9b0993e532"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:16ca76c7ac9515320cd09d6cc083d8d13d1803f6ebe212b06ea2505fd66ecff8"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:3d375a19ba2bfe320b6d873f3fb165313b002cef8b7cc0a368ad8b8a57453837"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:eb1a5b97388f2613f9305d78a3473cdf8d80c7034e554d8199d96dcf80c62ac4"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:62c7da0ad93a07da048b500514ca47b759459ec41924143e2ddb5d7e20fd3db5"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:41fd5498975418cdc34944060b8fbeec0d48b2741068077222564bea68daf5a6"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:147b0fcd0ee33b4b5f6edfea80452d80e419e51b9a3f7a96ce98eaee145c1581"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:146ca582ed04a5664ad04b0e0603934281eaab5c0115a5a46cce0b3c061a56a1"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:504e1fe1cc4f170195320eb033d2b0ccf5c6114ce5bf2f617535c01699479bca"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:6abb8c06107dbec97481b2392dafc41aac091a5d162edf6ed7d624fe7da0587a"},
{file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bdcf667a5dec12a48f669e485d70c54189f0639c2157b538a4cffd24a853624f"}, {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d14be4613dd4f96c25feb4bd8c0d8ce0f529ab0ae555a17df5789e69d8ec0c5"},
{file = "yarl-1.16.0-cp39-cp39-win32.whl", hash = "sha256:e9951afe6557c75a71045148890052cb942689ee4c9ec29f5436240e1fcc73b7"}, {file = "yarl-1.17.0-cp39-cp39-win32.whl", hash = "sha256:174d6a6cad1068f7850702aad0c7b1bca03bcac199ca6026f84531335dfc2646"},
{file = "yarl-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:7d7aaa8ff95d0840e289423e7dc35696c2b058d635f945bf05b5cd633146b027"}, {file = "yarl-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:6af417ca2c7349b101d3fd557ad96b4cd439fdb6ab0d288e3f64a068eea394d0"},
{file = "yarl-1.16.0-py3-none-any.whl", hash = "sha256:e6980a558d8461230c457218bd6c92dfc1d10205548215c2c21d79dc8d0a96f3"}, {file = "yarl-1.17.0-py3-none-any.whl", hash = "sha256:62dd42bb0e49423f4dd58836a04fcf09c80237836796025211bbe913f1524993"},
{file = "yarl-1.16.0.tar.gz", hash = "sha256:b6f687ced5510a9a2474bbae96a4352e5ace5fa34dc44a217b0537fec1db00b4"}, {file = "yarl-1.17.0.tar.gz", hash = "sha256:d3f13583f378930377e02002b4085a3d025b00402d5a80911726d43a67911cd9"},
] ]
[package.dependencies] [package.dependencies]

View File

@ -2,9 +2,6 @@ import glob
import os import os
from pathlib import Path from pathlib import Path
import pytest
from docling_core.types.doc import BoundingBox
from docling.backend.asciidoc_backend import AsciiDocBackend from docling.backend.asciidoc_backend import AsciiDocBackend
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import InputDocument from docling.datamodel.document import InputDocument

View File

@ -1,8 +1,5 @@
from pathlib import Path from pathlib import Path
import yaml
from docling_core.types.doc import DoclingDocument
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import ConversionResult from docling.datamodel.document import ConversionResult