fix: bumped the glm version and adjusted the tests (#83)

* bumped the glm version and adjusted the tests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * updated the poetry lock Signed-off-by: Peter Staar <taa@zurich.ibm.com> * fix hooks Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fixed the tests Signed-off-by: Peter Staar <taa@zurich.ibm.com> * reformatted the code Signed-off-by: Peter Staar <taa@zurich.ibm.com> * added the tests for tables Signed-off-by: Peter Staar <taa@zurich.ibm.com> --------- Signed-off-by: Peter Staar <taa@zurich.ibm.com> Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Co-authored-by: Michele Dolfi <dol@zurich.ibm.com>
2024-09-18 07:43:49 +02:00
parent 8242bce4fa
commit 442443a102
11 changed files with 406 additions and 361 deletions
@@ -25,7 +25,7 @@ python = "^3.10"
 pydantic = "^2.0.0"
 docling-core = "^1.3.0"
 docling-ibm-models = "^1.2.0"
-deepsearch-glm = "^0.21.0"
+deepsearch-glm = "^0.21.1"
 filetype = "^1.2.0"
 pypdfium2 = "^4.30.0"
 pydantic-settings = "^2.3.0"
@@ -96,10 +96,17 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument):
        for i, row in enumerate(true_item.data):
            for j, col in enumerate(true_item.data[i]):
                # print("true: ", true_item.data[i][j])
                # print("pred: ", pred_item.data[i][j])
                assert (
                    true_item.data[i][j].text == pred_item.data[i][j].text
                ), "table-cell does not have the same text"
                assert (
                    true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type
                ), "table-cell does not have the same type"
    return True
@@ -156,9 +163,13 @@ def verify_conversion_result(
        ), f"Mismatch in PDF cell prediction for {input_path}"
        # assert verify_output(
-        #     doc_pred, doc_true
+        #    doc_pred, doc_true
        # ), f"Mismatch in JSON prediction for {input_path}"
        assert verify_tables(
            doc_pred, doc_true
        ), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}"
        assert verify_md(
            doc_pred_md, doc_true_md
        ), f"Mismatch in Markdown prediction for {input_path}"