fix: bumped the glm version and adjusted the tests (#83)
* bumped the glm version and adjusted the tests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * updated the poetry lock Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fix hooks Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fixed the tests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformatted the code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added the tests for tables Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
8242bce4fa
commit
442443a102
736
poetry.lock
generated
736
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -25,7 +25,7 @@ python = "^3.10"
|
|||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = "^1.3.0"
|
docling-core = "^1.3.0"
|
||||||
docling-ibm-models = "^1.2.0"
|
docling-ibm-models = "^1.2.0"
|
||||||
deepsearch-glm = "^0.21.0"
|
deepsearch-glm = "^0.21.1"
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypdfium2 = "^4.30.0"
|
pypdfium2 = "^4.30.0"
|
||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -96,10 +96,17 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
|
|||||||
for i, row in enumerate(true_item.data):
|
for i, row in enumerate(true_item.data):
|
||||||
for j, col in enumerate(true_item.data[i]):
|
for j, col in enumerate(true_item.data[i]):
|
||||||
|
|
||||||
|
# print("true: ", true_item.data[i][j])
|
||||||
|
# print("pred: ", pred_item.data[i][j])
|
||||||
|
|
||||||
assert (
|
assert (
|
||||||
true_item.data[i][j].text == pred_item.data[i][j].text
|
true_item.data[i][j].text == pred_item.data[i][j].text
|
||||||
), "table-cell does not have the same text"
|
), "table-cell does not have the same text"
|
||||||
|
|
||||||
|
assert (
|
||||||
|
true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type
|
||||||
|
), "table-cell does not have the same type"
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
@ -156,9 +163,13 @@ def verify_conversion_result(
|
|||||||
), f"Mismatch in PDF cell prediction for {input_path}"
|
), f"Mismatch in PDF cell prediction for {input_path}"
|
||||||
|
|
||||||
# assert verify_output(
|
# assert verify_output(
|
||||||
# doc_pred, doc_true
|
# doc_pred, doc_true
|
||||||
# ), f"Mismatch in JSON prediction for {input_path}"
|
# ), f"Mismatch in JSON prediction for {input_path}"
|
||||||
|
|
||||||
|
assert verify_tables(
|
||||||
|
doc_pred, doc_true
|
||||||
|
), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
|
||||||
|
|
||||||
assert verify_md(
|
assert verify_md(
|
||||||
doc_pred_md, doc_true_md
|
doc_pred_md, doc_true_md
|
||||||
), f"Mismatch in Markdown prediction for {input_path}"
|
), f"Mismatch in Markdown prediction for {input_path}"
|
||||||
|
Loading…
Reference in New Issue
Block a user