fix: set page number using 1-based indexing (#22)
Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
parent
e102827753
commit
d2d9543415
@ -56,7 +56,7 @@ print(doc.export_to_markdown()) # output: "## DocLayNet: A Large Human-Annotate
|
|||||||
|
|
||||||
### Convert a batch of documents
|
### Convert a batch of documents
|
||||||
|
|
||||||
For an example of converting multiple documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
|
For an example of batch-converting documents, see [convert.py](https://github.com/DS4SD/docling/blob/main/examples/convert.py).
|
||||||
|
|
||||||
From a local repo clone, you can run it with:
|
From a local repo clone, you can run it with:
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ class ConvertedDocument(BaseModel):
|
|||||||
desc = DsDocumentDescription(logs=[])
|
desc = DsDocumentDescription(logs=[])
|
||||||
|
|
||||||
page_hashes = [
|
page_hashes = [
|
||||||
PageReference(hash=p.page_hash, page=p.page_no, model="default")
|
PageReference(hash=p.page_hash, page=p.page_no + 1, model="default")
|
||||||
for p in self.pages
|
for p in self.pages
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -159,7 +159,7 @@ class ConvertedDocument(BaseModel):
|
|||||||
prov=[
|
prov=[
|
||||||
Prov(
|
Prov(
|
||||||
bbox=target_bbox,
|
bbox=target_bbox,
|
||||||
page=element.page_no,
|
page=element.page_no + 1,
|
||||||
span=[0, len(element.text)],
|
span=[0, len(element.text)],
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@ -242,7 +242,7 @@ class ConvertedDocument(BaseModel):
|
|||||||
prov=[
|
prov=[
|
||||||
Prov(
|
Prov(
|
||||||
bbox=target_bbox,
|
bbox=target_bbox,
|
||||||
page=element.page_no,
|
page=element.page_no + 1,
|
||||||
span=[0, 0],
|
span=[0, 0],
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@ -264,7 +264,7 @@ class ConvertedDocument(BaseModel):
|
|||||||
prov=[
|
prov=[
|
||||||
Prov(
|
Prov(
|
||||||
bbox=target_bbox,
|
bbox=target_bbox,
|
||||||
page=element.page_no,
|
page=element.page_no + 1,
|
||||||
span=[0, 0],
|
span=[0, 0],
|
||||||
)
|
)
|
||||||
],
|
],
|
||||||
@ -274,7 +274,7 @@ class ConvertedDocument(BaseModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
page_dimensions = [
|
page_dimensions = [
|
||||||
PageDimensions(page=p.page_no, height=p.size.height, width=p.size.width)
|
PageDimensions(page=p.page_no + 1, height=p.size.height, width=p.size.width)
|
||||||
for p in self.pages
|
for p in self.pages
|
||||||
]
|
]
|
||||||
|
|
||||||
|
8
poetry.lock
generated
8
poetry.lock
generated
@ -715,13 +715,13 @@ files = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-core"
|
name = "docling-core"
|
||||||
version = "1.1.0"
|
version = "1.1.2"
|
||||||
description = "A python library to define and validate data types in Docling."
|
description = "A python library to define and validate data types in Docling."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.9"
|
python-versions = "<4.0,>=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "docling_core-1.1.0-py3-none-any.whl", hash = "sha256:80096ec6bbce9e616700ccd6bdd5a50e5d1a9a832d7968da3874d54b29962536"},
|
{file = "docling_core-1.1.2-py3-none-any.whl", hash = "sha256:bdff5643e3e37a24204449eee99505db0f1cf620b8e1ce4cf4b71850bf49496b"},
|
||||||
{file = "docling_core-1.1.0.tar.gz", hash = "sha256:69bc83d3b192d9e56bb91d77d8434d9fc109f8cb25ab5a285d2f3bccc10899cb"},
|
{file = "docling_core-1.1.2.tar.gz", hash = "sha256:969cde6795631a5f5f8cbb5e7ca0e4032864c1abc8fff762415a09a9b1f7146c"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -4882,4 +4882,4 @@ ocr = ["easyocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "dcb00c6601f61b087fd204d040149c20a7dcd72ab353e912e78dc265c86e4d00"
|
content-hash = "a6685d5cf1b283d805e10193a437662a1807f99dad40b56ab1e58e1b708fc184"
|
||||||
|
@ -23,7 +23,7 @@ packages = [{include = "docling"}]
|
|||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = "^1.1.0"
|
docling-core = "^1.1.2"
|
||||||
docling-ibm-models = "^1.1.0"
|
docling-ibm-models = "^1.1.0"
|
||||||
deepsearch-glm = ">=0.19.0,<1"
|
deepsearch-glm = ">=0.19.0,<1"
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
|
Loading…
Reference in New Issue
Block a user