fix: set page number using 1-based indexing (#22)
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
parent
e102827753
commit
d2d9543415
@ -56,7 +56,7 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate
|
||||
|
||||
### Convert a batch of documents
|
||||
|
||||
For an example of converting multiple documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
|
||||
For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
|
||||
|
||||
From a local repo clone, you can run it with:
|
||||
|
||||
|
@ -125,7 +125,7 @@ class ConvertedDocument(BaseModel):
|
||||
desc = DsDocumentDescription(logs=[])
|
||||
|
||||
page_hashes = [
|
||||
PageReference(hash=p.page_hash, page=p.page_no, model="default")
|
||||
PageReference(hash=p.page_hash, page=p.page_no + 1, model="default")
|
||||
for p in self.pages
|
||||
]
|
||||
|
||||
@ -159,7 +159,7 @@ class ConvertedDocument(BaseModel):
|
||||
prov=[
|
||||
Prov(
|
||||
bbox=target_bbox,
|
||||
page=element.page_no,
|
||||
page=element.page_no + 1,
|
||||
span=[0, len(element.text)],
|
||||
)
|
||||
],
|
||||
@ -242,7 +242,7 @@ class ConvertedDocument(BaseModel):
|
||||
prov=[
|
||||
Prov(
|
||||
bbox=target_bbox,
|
||||
page=element.page_no,
|
||||
page=element.page_no + 1,
|
||||
span=[0, 0],
|
||||
)
|
||||
],
|
||||
@ -264,7 +264,7 @@ class ConvertedDocument(BaseModel):
|
||||
prov=[
|
||||
Prov(
|
||||
bbox=target_bbox,
|
||||
page=element.page_no,
|
||||
page=element.page_no + 1,
|
||||
span=[0, 0],
|
||||
)
|
||||
],
|
||||
@ -274,7 +274,7 @@ class ConvertedDocument(BaseModel):
|
||||
)
|
||||
|
||||
page_dimensions = [
|
||||
PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width)
|
||||
PageDimensions(page=p.page_no + 1, height=p.size.height, width=p.size.width)
|
||||
for p in self.pages
|
||||
]
|
||||
|
||||
|
8
poetry.lock
generated
8
poetry.lock
generated
@ -715,13 +715,13 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "docling-core"
|
||||
version = "1.1.0"
|
||||
version = "1.1.2"
|
||||
description = "A python library to define and validate data types in Docling."
|
||||
optional = false
|
||||
python-versions = "<4.0,>=3.9"
|
||||
files = [
|
||||
{file = "docling_core-1.1.0-py3-none-any.whl", hash = "sha256:80096ec6bbce9e616700ccd6bdd5a50e5d1a9a832d7968da3874d54b29962536"},
|
||||
{file = "docling_core-1.1.0.tar.gz", hash = "sha256:69bc83d3b192d9e56bb91d77d8434d9fc109f8cb25ab5a285d2f3bccc10899cb"},
|
||||
{file = "docling_core-1.1.2-py3-none-any.whl", hash = "sha256:bdff5643e3e37a24204449eee99505db0f1cf620b8e1ce4cf4b71850bf49496b"},
|
||||
{file = "docling_core-1.1.2.tar.gz", hash = "sha256:969cde6795631a5f5f8cbb5e7ca0e4032864c1abc8fff762415a09a9b1f7146c"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
@ -4882,4 +4882,4 @@ ocr = ["easyocr"]
|
||||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "dcb00c6601f61b087fd204d040149c20a7dcd72ab353e912e78dc265c86e4d00"
|
||||
content-hash = "a6685d5cf1b283d805e10193a437662a1807f99dad40b56ab1e58e1b708fc184"
|
||||
|
@ -23,7 +23,7 @@ packages = [{include = "docling"}]
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
pydantic = "^2.0.0"
|
||||
docling-core = "^1.1.0"
|
||||
docling-core = "^1.1.2"
|
||||
docling-ibm-models = "^1.1.0"
|
||||
deepsearch-glm = ">=0.19.0,<1"
|
||||
filetype = "^1.2.0"
|
||||
|
Loading…
Reference in New Issue
Block a user