fix(tests): Adjust the test data to match the new version of LayoutPredictor (#82)

* fix(tests): Adjust the test data to match the new version of LayoutPredictor from docling-ibm-models

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>

* chore: Update poetry to use `docling-ibm-models` at version `v1.2.0`

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>

---------

Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
This commit is contained in:
Nikos Livathinos 2024-09-17 15:50:35 +02:00 committed by GitHub
parent 30a0ef69b4
commit fa9699fa3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 16 additions and 11 deletions

15
poetry.lock generated
View File

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
[[package]]
name = "aiohappyeyeballs"
@ -977,13 +977,13 @@ tabulate = ">=0.9.0,<0.10.0"
[[package]]
name = "docling-ibm-models"
version = "1.1.7"
version = "1.2.0"
description = "This package contains the AI models used by the Docling PDF conversion package"
optional = false
python-versions = "<4.0,>=3.10"
files = [
{file = "docling_ibm_models-1.1.7-py3-none-any.whl", hash = "sha256:a118c9a3b7cdcd6ee6190f6475cb56ca1a1a52f7e6632932cf9d5de5217098aa"},
{file = "docling_ibm_models-1.1.7.tar.gz", hash = "sha256:d4699f24e32c314ecd6fe89e9067f1abe8808f83dd9c13d05d423361e92edc7d"},
{file = "docling_ibm_models-1.2.0-py3-none-any.whl", hash = "sha256:1bad8fb67ab1ff71a6120530c76272e48b71c5829383d381927e8e51c2204eee"},
{file = "docling_ibm_models-1.2.0.tar.gz", hash = "sha256:e5558c66433603a7acfe0dd9e7bc12e99680af9484b26cf3e61e03b1cbdd3e2d"},
]
[package.dependencies]
@ -6610,11 +6610,6 @@ files = [
{file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"},
{file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"},
{file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"},
{file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"},
{file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"},
{file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"},
{file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"},
{file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"},
]
[package.dependencies]
@ -7228,4 +7223,4 @@ examples = ["langchain-huggingface", "langchain-milvus", "langchain-text-splitte
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "f50b5f6158b688cc25f80253e3cec8e60d852d66a90fe8eb96798ea3c2372019"
content-hash = "7a7b6ef730f468cc3d1c3054c6638362d5e50cfef48d6e76ea3a4fe534dd1ccd"

View File

@ -24,7 +24,7 @@ packages = [{include = "docling"}]
python = "^3.10"
pydantic = "^2.0.0"
docling-core = "^1.3.0"
docling-ibm-models = "^1.1.7"
docling-ibm-models = "^1.2.0"
deepsearch-glm = "^0.21.0"
filetype = "^1.2.0"
pypdfium2 = "^4.30.0"

View File

@ -1776,6 +1776,10 @@ An important design and implementation consideration is the fact that RCAC colum
An example of this situation is shown in Figure 6-1. However, note that aggregate functions (a form of grouping) are based on masked values.
SELECT
FROM GROUP BY ORDER BY
## Without RCAC Masking
## With RCAC Masking
@ -1808,6 +1812,12 @@ Figure 6-1 Timing of column masking
| **** **** **** 1234 | 750.33 |
| **** **** **** 0001 | 10.00 |
CREDIT_CARD_NUMBER, SUM(AMOUNT) AS TOTAL TRANSACTIONS
CREDIT_CARD_NUMBER
CREDIT_CARD_NUMBER;
Conversely, field procedure masking causes the column values to be changed (that is, masked) and stored in the row. When the table is queried and the masked columns are referenced, the masked data is used for any local selection, joining, grouping, or ordering operations. This situation can have a profound effect on the query's final result set and not just on the column values that are returned. Field procedure masking occurs when the column values are read from disk before any query processing. RCAC masking occurs when the column values are returned to the application after query processing. This difference in behavior is shown in Figure 6-2.
Note: Column masks can influence an SQL INSERT or UPDATE . For example, you cannot insert or update a table with column access control activated with masked data generated from an expression within the same statement that is based on a column with a column mask.