From 12dab0a1e8d181d99e4711ffdbbc33d158234fb4 Mon Sep 17 00:00:00 2001 From: Elwin <61868295+hzhaoy@users.noreply.github.com> Date: Wed, 14 May 2025 15:47:28 +0800 Subject: [PATCH] feat: support image/webp file type (#1415) * support image/webp file type Signed-off-by: Elwin <61868295+hzhaoy@users.noreply.github.com> Signed-off-by: Elwin * docs: add webp image format in supported_formats.md Signed-off-by: Elwin <61868295+hzhaoy@users.noreply.github.com> Signed-off-by: Elwin * test: add a test case for `image/webp` file Signed-off-by: Elwin * style: apply styling Signed-off-by: Elwin * test: update test case of converting `image/webp` file with more ocr engines Signed-off-by: Elwin * style: apply styling Signed-off-by: Elwin * rename test file Signed-off-by: Michele Dolfi --------- Signed-off-by: Elwin <61868295+hzhaoy@users.noreply.github.com> Signed-off-by: Elwin Signed-off-by: Michele Dolfi Co-authored-by: Michele Dolfi --- docling/datamodel/base_models.py | 1 + docs/usage/supported_formats.md | 2 +- .../docling_v2/webp-test.doctags.txt | 2 + .../groundtruth/docling_v2/webp-test.json | 1 + .../webp/groundtruth/docling_v2/webp-test.md | 1 + .../docling_v2/webp-test.pages.json | 1 + tests/data/webp/webp-test.webp | Bin 0 -> 29684 bytes tests/test_backend_webp.py | 82 ++++++++++++++++++ tests/verify_utils.py | 2 +- 9 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt create mode 100644 tests/data/webp/groundtruth/docling_v2/webp-test.json create mode 100644 tests/data/webp/groundtruth/docling_v2/webp-test.md create mode 100644 tests/data/webp/groundtruth/docling_v2/webp-test.pages.json create mode 100644 tests/data/webp/webp-test.webp create mode 100644 tests/test_backend_webp.py diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 95dcfe7..dbf9366 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -90,6 +90,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = { "image/tiff", "image/gif", "image/bmp", + "image/webp", ], InputFormat.PDF: ["application/pdf"], InputFormat.ASCIIDOC: ["text/asciidoc"], diff --git a/docs/usage/supported_formats.md b/docs/usage/supported_formats.md index 4d1ca4f..c38e7ff 100644 --- a/docs/usage/supported_formats.md +++ b/docs/usage/supported_formats.md @@ -14,7 +14,7 @@ Below you can find a listing of all supported input and output formats. | AsciiDoc | | | HTML, XHTML | | | CSV | | -| PNG, JPEG, TIFF, BMP | Image formats | +| PNG, JPEG, TIFF, BMP, WEBP | Image formats | Schema-specific support: diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt new file mode 100644 index 0000000..99ea552 --- /dev/null +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.doctags.txt @@ -0,0 +1,2 @@ +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package + \ No newline at end of file diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.json b/tests/data/webp/groundtruth/docling_v2/webp-test.json new file mode 100644 index 0000000..789cc58 --- /dev/null +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.json @@ -0,0 +1 @@ +{"schema_name": "DoclingDocument", "version": "1.3.0", "name": "ocr_test", "origin": {"mimetype": "application/pdf", "binary_hash": 14853448746796404529, "filename": "ocr_test.pdf", "uri": null}, "furniture": {"self_ref": "#/furniture", "parent": null, "children": [], "content_layer": "furniture", "name": "_root_", "label": "unspecified"}, "body": {"self_ref": "#/body", "parent": null, "children": [{"cref": "#/texts/0"}], "content_layer": "body", "name": "_root_", "label": "unspecified"}, "groups": [], "texts": [{"self_ref": "#/texts/0", "parent": {"cref": "#/body"}, "children": [], "content_layer": "body", "label": "text", "prov": [{"page_no": 1, "bbox": {"l": 69.0, "t": 767.2550252278646, "r": 506.6666666666667, "b": 688.5883585611979, "coord_origin": "BOTTOMLEFT"}, "charspan": [0, 94]}], "orig": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "formatting": null, "hyperlink": null}], "pictures": [], "tables": [], "key_value_items": [], "form_items": [], "pages": {"1": {"size": {"width": 595.201171875, "height": 841.9216918945312}, "image": null, "page_no": 1}}} \ No newline at end of file diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.md b/tests/data/webp/groundtruth/docling_v2/webp-test.md new file mode 100644 index 0000000..4289654 --- /dev/null +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.md @@ -0,0 +1 @@ +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file diff --git a/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json new file mode 100644 index 0000000..60fc699 --- /dev/null +++ b/tests/data/webp/groundtruth/docling_v2/webp-test.pages.json @@ -0,0 +1 @@ +[{"page_no": 0, "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.33333333333333, "r_y0": 99.33333333333333, "r_x1": 506.6666666666667, "r_y1": 99.33333333333333, "r_x2": 506.6666666666667, "r_y2": 74.66666666666667, "r_x3": 71.33333333333333, "r_y3": 74.66666666666667, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 0.9555703127793324, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.0, "r_y0": 126.66666666666667, "r_x1": 506.6666666666667, "r_y1": 126.66666666666667, "r_x2": 506.6666666666667, "r_y2": 100.66666666666667, "r_x3": 69.0, "r_y3": 100.66666666666667, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 0.9741098171752292, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 70.66666666666667, "r_y0": 153.33333333333334, "r_x1": 154.0, "r_y1": 153.33333333333334, "r_x2": 154.0, "r_y2": 128.66666666666666, "r_x3": 70.66666666666667, "r_y3": 128.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 0.6702765056141881, "from_ocr": true}], "parsed_page": null, "predictions": {"layout": {"clusters": [{"id": 0, "label": "text", "bbox": {"l": 69.0, "t": 74.66666666666667, "r": 506.6666666666667, "b": 153.33333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.33333333333333, "r_y0": 99.33333333333333, "r_x1": 506.6666666666667, "r_y1": 99.33333333333333, "r_x2": 506.6666666666667, "r_y2": 74.66666666666667, "r_x3": 71.33333333333333, "r_y3": 74.66666666666667, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 0.9555703127793324, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.0, "r_y0": 126.66666666666667, "r_x1": 506.6666666666667, "r_y1": 126.66666666666667, "r_x2": 506.6666666666667, "r_y2": 100.66666666666667, "r_x3": 69.0, "r_y3": 100.66666666666667, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 0.9741098171752292, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 70.66666666666667, "r_y0": 153.33333333333334, "r_x1": 154.0, "r_y1": 153.33333333333334, "r_x2": 154.0, "r_y2": 128.66666666666666, "r_x3": 70.66666666666667, "r_y3": 128.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 0.6702765056141881, "from_ocr": true}], "children": []}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null, "vlm_response": null}, "assembled": {"elements": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.0, "t": 74.66666666666667, "r": 506.6666666666667, "b": 153.33333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.33333333333333, "r_y0": 99.33333333333333, "r_x1": 506.6666666666667, "r_y1": 99.33333333333333, "r_x2": 506.6666666666667, "r_y2": 74.66666666666667, "r_x3": 71.33333333333333, "r_y3": 74.66666666666667, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 0.9555703127793324, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.0, "r_y0": 126.66666666666667, "r_x1": 506.6666666666667, "r_y1": 126.66666666666667, "r_x2": 506.6666666666667, "r_y2": 100.66666666666667, "r_x3": 69.0, "r_y3": 100.66666666666667, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 0.9741098171752292, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 70.66666666666667, "r_y0": 153.33333333333334, "r_x1": 154.0, "r_y1": 153.33333333333334, "r_x2": 154.0, "r_y2": 128.66666666666666, "r_x3": 70.66666666666667, "r_y3": 128.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 0.6702765056141881, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "text", "bbox": {"l": 69.0, "t": 74.66666666666667, "r": 506.6666666666667, "b": 153.33333333333334, "coord_origin": "TOPLEFT"}, "confidence": 0.9715733528137207, "cells": [{"index": 0, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 71.33333333333333, "r_y0": 99.33333333333333, "r_x1": 506.6666666666667, "r_y1": 99.33333333333333, "r_x2": 506.6666666666667, "r_y2": 74.66666666666667, "r_x3": 71.33333333333333, "r_y3": 74.66666666666667, "coord_origin": "TOPLEFT"}, "text": "Docling bundles PDF document conversion to", "orig": "Docling bundles PDF document conversion to", "text_direction": "left_to_right", "confidence": 0.9555703127793324, "from_ocr": true}, {"index": 1, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 69.0, "r_y0": 126.66666666666667, "r_x1": 506.6666666666667, "r_y1": 126.66666666666667, "r_x2": 506.6666666666667, "r_y2": 100.66666666666667, "r_x3": 69.0, "r_y3": 100.66666666666667, "coord_origin": "TOPLEFT"}, "text": "JSON and Markdown in an easy self contained", "orig": "JSON and Markdown in an easy self contained", "text_direction": "left_to_right", "confidence": 0.9741098171752292, "from_ocr": true}, {"index": 2, "rgba": {"r": 0, "g": 0, "b": 0, "a": 255}, "rect": {"r_x0": 70.66666666666667, "r_y0": 153.33333333333334, "r_x1": 154.0, "r_y1": 153.33333333333334, "r_x2": 154.0, "r_y2": 128.66666666666666, "r_x3": 70.66666666666667, "r_y3": 128.66666666666666, "coord_origin": "TOPLEFT"}, "text": "package", "orig": "package", "text_direction": "left_to_right", "confidence": 0.6702765056141881, "from_ocr": true}], "children": []}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}] \ No newline at end of file diff --git a/tests/data/webp/webp-test.webp b/tests/data/webp/webp-test.webp new file mode 100644 index 0000000000000000000000000000000000000000..45bc1bcd8a20ad803dd2904fe4a677e490150704 GIT binary patch literal 29684 zcmeFVQnA$e7K4uZXZzU5)Tasa93z+s{J5+n*`N{WjL37jM->X9HV zZ0}7p$n>uJDd6|;e#UpRcH{ZG{6fAGZpA<8pXu-Y9()i#!_N6f{TzOu1|n z=A+D=;`N&yw2?Bp8>{8=s)Z=&6>mIe7;yF_jmdmg1=et|Fsb8s!{AHoL2{@ zdTwik8NZ$aCyT7!(idKb)7*JjD3R!T8gsZfmxzHa#Lz|A@bn|lwWG{wGoJSF4%UQ? z^H2TyWtSZ*CHn8IIe#$m3qalxA|G$sc}tthr#%$5eHQC+ZPTs9v&7McK%gvew#aJW0dRxbt zM;-VdQ@ggw9E9XoD;r4la|CbmgyQji@pD2_Q>oJ~+144^-(}0yp2tj}s(wYFMok(h zVYb(SOzd`9UjrCr+VL*;bI=NF^rMWwdu4e>=!ez_Sx3_Uu?!yRo(v+$EY~o0>e9EP zb?9j+V#J;ywB)X^<|#kl3&bvpHiG>s5f z-vA2nnunn{kBhCfED-z)lNU!O-{Zw>UgDY1SiV-8brq9~JS#08;gONH^W0xWTc0EbqS_I4r| z3}H_f{-SFxXg$VJ*ikg9689((&{1a;L4K3l=r@m;-tbD!Zt1>H9$YYS^Fac3hl-p` z#~4G-+?xY;{80IYV-4xypeZXB*w|BJHqTqppYW)=2i$nR<53C1GM*qA@yQ&=#r<0; z#rIX^>-R-8)vSO&%2E!H*bzegBe!y42u}S_wsk^$JZY|GzoqN)fj37q98BSlZKVj* z0$&k*)aK;!d53Uo377NB0eNU5jx!N8AkOc$Vzpy+?a=BD^~pLH?tNrPSWUu>=WPC- z4+hcv;+(xZn7)f^+QBoX{kg*)A_r01hK)v|7F~f3=@bhZ1a`wmoC?5S+y?I+m z32N@-I{|*4Hi!ZAL30G^NWecV|4S$H)28C0N7p(n+2IZ09Y{DBUi(*wb=eNV`i$@@ ze}0AY#@>oFC>>``6e-cs$mffIo3v(w^2cN>DyzeUmvIo4zn#$1 z8=UBeX;4(&;)T*@J`GC32%QBeJsDQ+YfvAhT_HVqSH|v3^``GnovbP`(%8oMJ3xm| zb)<}&ej+W+$Q>ojrhdkKIHvCzr9$X(F(zZ23-S0GDk2O7q6r-BjUj%d97^7bp=9Kgk>KUqn9#$VLIsGoPj4NlvkF8y_W^6P7|U);pa_OZj~|iQ5D4&zt@NnpKH@2)aqP zar7u>971a1w^Ig(`3f0HL--fgFI|8pD zRY`Oq{t2s%@WZowF=t-%sMWR1Q&4#vjC>93Ekndo4$%|-Kk=0yA+})TKJ<;?HOM<%0@9`f_`R`OZ?8{HdVG@T|T%8LHzuyBQd_=XE*MyhBGX^StBBq1mp{My-9~(KT zj>-^0)@P&D85<=RuS04wO3^FuVfy0$BX1Vv#**tyMJ;0@r)H+ z!iD-am-Qs*o@ChXHWV#~F{M^X8JuCAokEOx@;fWLwJ&?lJxlHH|f? zKq8#dJ;-j}K7LZ@A~!Ct=4WKRpz2Kr?_T^Lx3e3X)=(IpuC9!C0|QVAKo?VOo2PKu zX7ahWaVYtuYTl?q28aXt?i{1iA6N@^#g>C)kL+9(*yq%+X8F8R#rUL`8V8~TpA);s zx4F>4SF-{vtc-Dg!PZQ|hpJB{6dG#%I4w%&iiGE$N56Cp6l}}~Ru`?>x{`^dc?FTl zc(zgwOv^cfOt`PYD^M``hNH!R3*xq95?tOK+$!LKn&}_#7D+>X_#tQ7nrOKR5R*f%1e(BMX0?p46!h1WQEC zE^|6l03~o$70dKP7WA1-7r9mCk1x?U5XX8NizZUmS z&~t17T;}(b>rX&D(#S~rZciSZy1rVEbU9i-0>KGMm%~)b&clY!IP)hC?+rp<-%!(e zEJkiM2;eY~guF#7z)B7L4aBa?rx0={mUctxurvd3!F#aUx7$n-k(+V&M` zPXSWE$||}G#Q45shF)Fw9J69|HgikYRf`*lL_5|H{lUQWjn{d^b3}Znt??UJInf@9 zc7buLg#fj55|GL{)#-4RPzhpl{*~1gO}gxjph3H4fXT=Un#$uye>$$E(nf*a`{xu~ zrQ;^(+6<&Ii27IykmmDaU?kRW0z&VakiHCTK=_LAWSzD)CFyPxnh@80q{W_>&*ykL zInTwb;<y9$^|GfTP6Y+r|rdmwe1(^9Z6L^zQGN3MtegNcjX`LzW~0 zt2iiec_<+`&;8y!I%hP@h42AzJ{Oh}0=vypa>*1EfkA&t5(RlGDsY6B_N4>E6wc$h z?C+Kl_bL&CY&;m+VR$bT+i+%=UNg7pVzQLIe~$iROz>_z(4NYg8Fn_;$phE zRWY&y;XRH`uOzli_8i19`7iWcEPek4)f;*FU+zoUSM2jMiEux5?xA~U1vFqT{GLLr zAiO^pdn2)d0moN4)rNhisavnETxm=*r~NDKmRUPf+17gC>(0mxbQ{>{tSjxjsSQ`E zDDLjN5-YjxH1Yl)+xu4${a;o-smcA6jsEe6-Aks^IKf=Tf~gwC z`v3`;ok#NC;0}83?c!DBa@7Q0(;$c~tX5t(2yEX7Yszb1&A}8uV{e|jsMspuz&HgPW z|IeSlp!k9RKXc~a!}|Z+;{S4ke|PzR5%T}mKK|G4>2Hw#hVHN8{+)K?Z94o` zrSD(*{;e7Q)(n4X_?L!%Y513he`)xahJR`Jmxh07_?L!%Y513he`)xahJR`Jmxh07 z_?L$N|Dj>l9>Dj{PBXxNbuj4yy&@b!eHxI5fke-H%XA%iM*Pr(XlxbIr#1*iAF%Ll zAX9<#+1y)(p!-UnFsoRR+ zp;rV5>x)J6yD4j`8FQ?EHepuUD-!_ipbKjN3ZsU+CA4#J4&b&zPpQj5Hv2%Ly+*i3;>p3;&n)V*Sdjxj&<*d zhW%9wtipTFSMU+NM7M-i`1}B|*t`rFWPz)Q(@Rc(|EWUl2GqC;5)G+-!>=G%ssu3Q zS2E7vF$L@6fkfaW+d!GA!Sk3`>$&aDgYbifl3EXYBM~V{EG86g+YnbaT=N14rXA-UsqrW% z#DGynaF<#)kPkuL2h`x$gaaX-w^qqxASj0k-A&WjJd4Ofy|TLZ1NKpO*s$TS8E$hY zZA~NQ56c-O;>Spk7ng?R3~b*L%8Knc)sY$t$Ld_S?m3$L(XY#+)w^^9;q|Xg%KIcp zh~?I3f&_mfZ;Uat#i6MUR{Oc0PS%L|+ePk}Of)()|7y%naxcW0qv!9vG>Eli-d6i> z9!Laoha+N7E97e+Cl#&Rv>j)X(=7z*q!fQ3(<(7T#SrsN{cJP?GH!sQ;d07yaYe}Q zsYjznFJ&kf_QR@o!bFWshP&APiCZaBol(e!lCX^iu6YW+!T{)53xT+s9;z1sx4Lh4 za|cwx<#n!oWbx;r-<4`^ZrMnUG5dp7pg3Lk{Q9(ra9AHkwlg-Fh$2pllT2O!J z)e+MT9v>3%kWCs>>tr;)pDr~z`cA)4c6Ae;1}`l85`Z<4!g(QY6`vDH?#-cOzOzSO z#ny+LNL--@1`?b${ic+CSRDxY{P_Vw^IzC&SXfk=VUW`x$Q5orQ_|XC(Y*7u&cp{9 zm=s6i(6?hrw3#h`$`i(lX^(jBuZ19y3rhp@bP8d42VkIz2e~QTE)Ww0p-&$=#stcG z_u=9vqIW(Z4qO@crRyZ*FbRv`Z5I&MAo3)J~Pm7V+{^#!|};@DyurbCD7ZT!f0 z8dD9GHeV7cIu~wJt6|fzp0f=T#^=4`hA3~jXu)bD;tp|}6>|A>ADf?kGowD4@)oa%bGFk<@5dO~5eJs!$qP4d};ce=kqsF~&tb z`UmEI7o=TE^p*K(lx_72Dp%#U+Hr0z4!xMCt*Z4A3fa~gP1Msb6ef7JI{k6w`PUI` zKfT_=HroBS)#!ExOdyB;Z&b#NpqX~2B?L|yJHtf(g>q^VVb+e#sVVl0GSu&3AEvr6 zY#Uh@AS1>gt|pHsEA8y<#kRnPDgCyFXNy>CDVKV*&mH(2hJ6UbnPC94W$)@l@?Cn! zBrYTttk$z4cto!A)oqA%*nzW}vy~djJ^OUpp%Q}Dj6C5Gn{Y;<15a;tSzS->O2cKC z2uGv2M4X8cJDWDVBd~h5jlinu4pAs704sk2g$htXwVE9OXq)j+JN#Jcds$3BL3k*| z%^@LkNm*)hmJNE39vM*hrZ+F;P$2FkMb|4m={{TXOmP>vwqavtK1a_9HA$Dws0s5W zQulQWr}Qw)wTdX*rQJg<*OK6S@q#;6d3FKkz|W(aY^tr&xz|kg`(__Mf-@dj%@SLa zGk95_VsJb84Rx}IYnuT}!|th|`3E5w%5M?y$S^=Z(3DZ-5B&6Q8n*mN%X<%7fn03>f1sN}X=~mTL!t zAu|ZFF~Xf~EWXiAaB2y`7WH+^Z3$?F!_TcAcMx&X`OWWk##^h?>+#D6B77y;McDuL zN3X3VZhBxrQzxOcR*%SI&YWvY;_~?y6gs8?bbGKDU>rb>jXks+X!EBT7xfXarL+|} za1WTL>R??QKE{tN8J9w0q_$%66XY}BG>(FNrs>;(2n|2L3UIOxNb<>Ks+RJ{DBgDk zre9NNxMIGoAI~OlSc!;Zc*L%J=MKL7e)C7nmLO{FvL?R>kQngGkqtQyAfo*jT;9$K z!oC(t$;c($<#(rGT`~e$tR>k;BiKw>H`2BI^3`nFKdjspAJ5A(!oAR2{fN}Uwq3k4 zWN`UWJ_H*%X4W&G1bBsVF^M&3*%Es*r^eKkIycb`Obw9ZE#W9T1lC?Bu`N=lgU)S4 zGz$`jOnHYp(p`=r)7d$x%4&KE31$w{Z?&+3e3USlLcY)Y0NCGvVs^ZfezZS?nFVQU6L= z?$O@9&1j{!J|R!&I%NAcUcrAoJU;I}cj(NqUl)jkDHgq8B`MPGwSDemR$$kQD(G62 zz!6m0d{r%Md9?XwQiT4!9x;hV^9XW#$thILFhGk9c|&TQJDJ_OE_L*oteMR>k#S28 z>tuh^$!4OoqxNxo8zG=31b!+6hxg|FwS3ay`+l)FG2nucXj^?9J5N4hdf{_I z45BO!Q<;1W)m@N^0^~8RI*$r)IEVUdr!r>ng+;__Crl?BbRA;P%?nQ;ke6#WZ^9TF zB_0eLNu{&?x`&;RcO>wY#i5chJJen_xfJ}ib;s38ABs{J*W}i{HiGX!Y3oQGY~Gos zNY|h9kXzqaUg*`LR}@ab@YemgXnfm&HFmlA>&-m|DypB<&A{L=#;O|rr>Sr3Y4O{k zD96+fYl|uxOxI^Dku`j8Yu1=I z`whwtKx1&}3$O{N9hD2rNktOu`8TOspFJP;d9^W>>(t?9;&Wz6kC2oa3VFB<4nQbp zU!@XYw76=fZykEl*=a=X_jxKb$NPXV*iv-hO)OYJ> zinxmjbGpB)y~O8PE`uad2CA#jJq4Ym1mq}T?P;^!u5vZ$fofitm#u@ggrj?t6Lex(wq1SGfGzMrjEmDtiz^JI2q&SQ{dkF}J;kzVx!YdVnMBuWE-C~k2l5D$F`i9t;Fz^l(HWucD2~b)W73Kv! z=u!a>q9;)IzMq-}An*ZN94m<#{eJ8hjm_^2Si25R5rIosvY%#Qe6tyZWl4sl>y;KyWOTzz<>* zSQ*zqjX!S-vn#qAM{HK8`oo8&O+5#-%rQpOP~m)g4X%G!U`0%aqzRFH%aujub{6Fc zQi^qp4Nezo;f@LzDClh=YY7QyhT~7P7T9x~r$&vyd;!gVgMzq5t}r~S_YKUo;$=C; zy;be*=Yhyt5?NvR)P%hSZe0OMFb<*c4w}Y!!;Wi)Ac~!0GCMdE>*gHa!|hqi|9)1D zCcx+=b@`g%Rs%|w2J9fM%pUL$iJF@*pVgu{xb^NT^q39lp^z}?{s91I8-v_zL-Ym}dRM+RCr$1qJB#*kh z8hf(>3F3Lb-4oK9W%Mom=23XP7(br)jR=u$+uot@T)r>KP`B?1j+SvmrtOat#CK2i zHhg|D>YAIBks(WruX36h%nwD%1>=W}nq-+xB zlqJV7OXNvrr)&dKvHSea+6Q7n9|kos-z{VmQ+EXq5Jc3u?M!y#o0MgM}9fD@~OmH6ylj5NnJs%@~+V!iw@5HHc=b5R7Pv2@4j_dwaH zs_SQi-hI==Brux+xkPaA3RbTirU83m$vM6pb>&+)YTJ2t1+N@w z1i_^+WUY_`*AH{dt?YXmKE~=`)JQ{7Ti?MPVzBZ#DL!DLvpdJ*@ss3q$5k;QS#h#B z;*^6dSB=^G4a&85?ripbP?KR0EnGz2j6Ry!fUb@XGOCPnq3x(IWTqM*5&6bRGhgVx>^3C`th3TX`FZqycUHFq0IeJ&mPD%k*%m$n? zb`;SbC>TI+dDyC%!XdAfVgkZw{bOUk>D6U68fS*ImRZLE_a!k*Kk%5sqF>;wmZ0L{ z`h^6sWi?nhq~g~5}0_`}YM~*fC!1YaU1GrBmB=82I5Fg4t3Z`vcMMF7s!Tr;DP{WhiWn%~_OIK1c zb>gBz5b>cWn#@xk_CW8lQ9zbqdm4kO5F)kNO><7}6)B+-r%)8(vw-G;IyCOEgAfZO zV9pi!y2$!loVs1Byao8sgd&))zDOU&b^s!!lJl$zN=a z!MTH-#+LM(hnaX0kl_6RV$*c9;*gyf%iNeyZ7~Sp0W9}{Ee*BLuO$!+4Eq|5>Q%4o zvL$fSuOe?p^Zuj~HF7=vNJr+M2Q&KnxJ`$-%1-;ljaB-ZKRjM(sUrdv-=UQE8HYN|3Um;$Z09f|pRq^L8)A#X&(ya$p5IDup@K#HU7m5$a$! zxfzR{moxdFSl=5WW3WoaJ6kD;rQXLCRimuyTc#&49PfT;5!)EM%DhDDo-eEO*2-&} z1M`Q1;XZD?1Q+gwr&2u&FKNXzE%hQq3%qrPb-Bs@=|*_;uux7^tvmo|miw~3J&?fEKTM}{i6qkzRa8oTEm38YRqdH~g4`PJ3aip}k?bg}@2ymXbz{ws zOFwfs{PQ&DZ*fjASko?PzxV^|ryOXLsvYDJtWv9~jceSZg(p^QNv#^pW!nkKT^_cd zaf&+JC84cDK6u)?*1zh~Kih`L3h7VKF&9DTNk)*_k31gk89ER>RYLdU{w9fzjRgIh z&shco4s-m&HFwLewQpCyZY_{2ScpGZ>1rEA$I&vt_lqx&O^OcLDc@tu?%*A5V%+T4 zBML`d$}AvaZ2!=@kgrOwEd8`t?r*SxK!oN6OkN`|T7 zg9Lf|Z~`Z=Vn(L!z2Bu2BWA&Rb4hExZf#IQrA@F+Nn9}q{*9x^$5Lk*Zh>5v+GHPy zWP^QKr%>je_|*D33-WMZx0UMltMfV7tI`9d44by@wB4m6Kb857L#rL`fCK9Fbfr1? z$xfV_P!9<2Im80OOYb6dJLziH`=OzvA$gNi2_X#0VmEIPx5}V1A~BQmWMoC?tiy@! z7g?ZR#)|cMfwK~U1P^5%{bvLIyVp^p=3Lr3#zlRExuShg6eIV&KPxH*`~faoo>i)y z_d-#iXcar=q+yh?Nre5-q|X`3R&Py;Cx=g(s%O&OuNQ}JQBkWE%8X9JAi%$R9$c_) zc*y+fNu_5M2>L6YbzYlheZy*`Gi+4~kyunX%b!j&9*3;b0|ViiOJ|~jjIbfQR%m;x zi|Os_JEvQ>skXOy@{HM3vls0T{wzf4Pz05`b{_ZidK+-ivwl#qvb6%>9Q5L9xbbCY z*qKE^&o9=4v=s;XKpW@VdOB{DS8oLye_ya8iRzvKUgO|0&Y2mejU0OvdZ?=8k4up6 z`p2$8t6k0Bi%TEe+&p0PhUk!yZSlDjg$<++WWG)tj0QI$Xhsd=d*gH`c}W;klnzFp z2RcEM0C0wPjXpXMWP~ylFe_`tU2x!y!1&clE#+~cwT4#|Y{M3JiNHFaiZADYN2wD( z_pzr);R6dAC%b9gfaoaNy+1r`=I2)Hn5m&OEp+SW(Ltenj*mNk%{|C#s3$UY^H@d* zl$iUl%ND%>6&00!r(+ZWDpyHyqsFbwzUW;`Ze*BIjnb+uLN$^hPKjO#dQyP+29=-H zG=A~usB|thB{?>A!?7n{I4YnGWR)}YUj)o?xo{MugVd9fhJ7pjj+!U&iUt7HS_eg& ze>wQ>oG8@rU)_T*hz1Nk0poF!C}df#j=C4Q8#x4$*4?g>YR?A%0JY?bJ{E-xc&+K^ zD#$vXX*p#8z`gbl_QNy{gWeog_IO$Sng0-6mQP;n-lwH4!ds{X^zbsuZjy(tTYfyWttXWh|Z@_4vLS zZT~?E=-fSdKiL1`P$$(wE&Qh7dbN;agd-atqN4bnz&;h_h> zlqOa#`i}J5y(CvrKU1#Vuu<9Fn%%j}EmqYW*uitYfs(1^Q|MwONd*8W)57yiT5@+< zPFZh)@#79!2eQszyxuCN$&D?8+gRjhG{=ZP@N{9gQb42~2i6BQP$1h_;x1`!nj3o$ zcTUL{qi3N5rlx+lP*Nq6JH<$ME-z_8p7Qfy7qzP>sz**%2XZuU3hkR z8{{*%JaH`!@nEc+SI{Tg2x6@cG0W@pBUhiTxcj!4yi=D7c9d;Kw} z4eQRrm?{YQ*BrHHTz3lS*H`UGuLrToq$K=DfhY|EO2L9HfUSG8zTyI^A{Jm~CLO5k z(Xh(nK|FQ#k2}@Sf9j33rU)u)w@Yir0;fh$(T2Nyk>ru*D!5kNMO^t3rrfjWU8=ji z6jEs%3QmQ8>9fC&nS=`*a*98%S`l@EQKd^Q_dX8Hx=Eg;Yv$nvoH$*`XR=CQ?0$8& zXv5YR$F+!C!uAQ=IjP=aPy5+k+e90MAQinGmZFh2JL^g-&GcZh%M^d8&0fHeRI`^i z*D}W*Q_K@IV-#S5WOu&CO>B4`xe$u&WvEJT?}Wd~hO-3NGs0YJ<;Ryvf}*BfNq76T zr^e8|27^4FDC0CgjX}11o&ETARD#MvO)I;SGLBDQ?z&W!P@n54-}%N=TJLNbiwko& z!~&*_@`E0jE=wJXP)(6~H^6!)N5VUwUs}!uG0jEn{0hKTFyLb=-tq1;L7)6E^9Vlk zxom%|x#mR`1l`GDcI-UiV2SqOx4ceE>ZI|?t4m)RR1q2U%FT4={9Rx2yB?ZEv5V(G zu`j)Ecxy)QLN=r2g%U-r!*4_Vd@~fL<m1LN#&b z<0nJ&qX@M+Yre)eHB~9;b`@C0+z_Uhqr^*a39vmpgvd$vL&@#Wb!r>X&o@b-vfOUlS-3s6J6ESlJpqp82($m|v+LLiUePTG$CGxd<#eRgz z$rv&cCyza4HoZa0A9wI+-1UFbzy2a*j8cm*&ox5@vPY-+)@_=99W-1{+w{RRyLeBP zO+S#Rn$|7C+-L9Ab_wMaoNVweuM|o$7(~2S>r>^4(dSnNxa!O6cFvu8GuQBl&5jbw z&&=Gn?RAuIP76OagiB~!EcSszSC%%p8>^LBRA-d!CTcaVvjzn zhP;h3n8MMeJ(S*n9Bj(icOd_@7rBA;OrV!mF;M%8E1hd5G0M-0ZW9q%CKCG4)$1?W z!$iaKtb8N(cPpvK2;P?>Y3_CbV~`eLN`>f3ik*gE0sT;hJrtx48>IR24P5*3w%yVW zZJsuY-x$@E-&fXi5|?@}sjF6Ta{`Bmee2CS@KfI7UUF? zqRB%Nes47nZqO7a(M|g<&T1(Z{B~54n{&gvR8z)sKmP3^@LV&9N}lS^N+z^e#)hfu zk5vSO5kx6}8ejc2awY(<>?Cv6xpZK2F`*IQE1=Zg*rI=(L*xZHqxE1jsGpSI^sqq3 zcy0q}i=!M5#RCuGZqjxSp(d0`RkKfEE5T&dS+*A2)@;30yIe>2yu;t z8P(WZl!X040T%QV(A1TE+&Ar$To6;Nx8Rvh&%E6IinXrSPboK1mtPwFS*z#=$2uzH=Cg%HS%y?>It<#8Dvp>EbmD7VommX{YDPpohhR?eN|)LI ziKHd0^l=Fb7NT8dcC(N3aH_Zigf_KpQpv~bFYJG48?cG4P22FcP>c z7UoRJhR$Ej{Ks?+P>?e6c4Xq4v_q8>%He8HH2iRTBwWl!cBe)>SZAz1VzI%sG zuLgX@GE%+NJQoUnI=Qe@`~>$CwPQxF2$-I%X2wHjjzVi>$Llmx|H+|@E|8MQIM-3#G$_-2e~~ zyh0y@?4_yRx$@*5qDl&@Vyr=JccnvRwCuCuRvXS~6NOQ!{zx5IyrYac%`)GfEEdyW&rbo^+dJpenB5qj*~0Kp zg@WRcN$$U$ciTjCOIx>X`rl8&G5K9+L*b?57;s7(NEAq zSMbn-iO5x!p3e-pwHnP77emQaH$HPrETcm#+bjHvFH7!e0huPQ%Yr3;&u<{;IS}*@ z&jrb17-QZ<&mDqKD-pt%b+qTNJ4*sY*!|quIUv51kE<-g%eo~S`Wx)GVxj$O? zVQ?>ZhCHZahbZt7NgiJs5Ky^at+ds!fwR0kJPNuQe+ZwshS#3ayrciLZ@;MTK}$bE zVNH{%$kJFdRK+ikVf)W@6*b9OZPrpS$xnSf-wZ{ma2BDXNKF>}?a7IzrHP zOFA~L?D?2EXV@LquXU5iqlGQNm ztN&qUB`9l@TU8bzy@K))jPJ4^!tSL*0V*O?)OZcGWYA}WGayQt@FV-erO`}}#|h1S zs7KFDL{;M~dMEIu%?fjqUKGRIcu>)I$szk3)+7cU^?b^Dx#+uxCLb4)zRhi*w@#Nm z>T_wMg~mb?!#UEOO)&jvZ)G^mD@^484utO*9O`<>h$dmb=cwTgzl@>Nmu9Jk4Q2sg zqHR2`F(k`l!@Z?jLfI(jS~X)}k4liYjbwtHZ6kYfGS(fN6_0MP9Kl{4(pLx1kFgm9 zBruE_1#n)85kJFk!T6dDe3@!)5iObHxf{ffHH*R{Ck0IzLSoDa27I#YqJ{zncW+*K zjxo>6q#cA8LEu`Gi!s_k{K+LbVt3oD`@k1w3oK zeyC|u=Brd$a#%e2hmsS0L>VbFJU4KpgpfVS`?YXm*V;aQPP_cHhdbDCeyEgTW9TDN zSd>WAQgQMQ92*SWRq0DNy&&Ygg(z2IGq4I*t>b4=$J=6UKYYU2elcJ<%n^zyh%;8k zmPMYK1WT+zWcH>98=M897gOS2Wu}XBavkod$`-07%8dOXp6k=kua*l7t?uP;rI~6| zkSsx7h=jJ`F7SGyF*LoEq_1NRfE>9_)0ofaVq*!^EK@sKrsha^1kWWPa8gnP72} ze#`uolp!y#)l8}(S(vXSaM~G38#|f_hAf`%>tzd-WqTvScnGK)0r*sRQA46zrXAp#!R36#5n8Ya?h3zAR4S1-IT}zAM`_pSMLm9E#8Naf>&RH z+tkCEneBzO1dRH0CqE{??+EZi27~=VaFGk5k4=krY@X#(JV#-^Sx7F%6j)HD0mh0- zXa7LCFBIr+SjE~^ z>6F7ZN?OO!SA1}25xXcPQw}Bj=SN7R4gge2;z?wky}By<Me!Ju6oJQTz(wNr_Rr*h$Nf#=_I~o+>$qsHmN#1s56U!OXkPtbRK#92Lsd&NnQ z&V6}`B!@_>2(ImTkrswR?EvUFSE&UF^4}7rqxj^WKF=S?rvWKc)h7K^$MXv=JrFuv zAfcI7F0x}ehwTVv$2stE(dl4kz=CbeDuYxcDKtiEqLhj-Id^_N#3ngp&*>Bk)<>5; zyNZPiT51{UY+w0Gk$iAjyc}hBcUhv4+PJymlW#CaMdgKEFb97+GPGl(V^;(W3*Zf_ zie6TfPW?@=?r(tG%An2m?x!_`6bW)x&^>qlltQu!ff+6GB#p7?>6fFQ=x~F0McbL|GXa@{L>OH6R*%GvH7|z$cmp_7s?&^b1Rem*m3{eP!P4*qJC%gKI ztRY0>ZIEmsvoh^t3L_SSjl^&Et7`vanWuk9tv4=}#LfC4o;GAD1rvx&l@3J+K9qd0 z2QaI9JXGJyEzogJJbqOD%4AB)0T$Bpf^~giVa6XMSZJOZfL$R`sh5>7Hc$k*!w&U= zRCKxeIUuBmD33t2_Y&CKMMwp$N%n<*vnJm{76c9%kh&KmL?5d&-qHi^sCfiuL>9uL zF|W9*b0j_-tAWsw8@0L3mZ|gqbP+by{c8BUOEVT;mq)lNaC`u zusBRE9a{nwoGC(>>AX+Q>KWob#WVXw-+DVf()BwEA^F%*BHG`mB=KQDZmVe$yu2Dzl?YGtSoFzZM;$2#vJJ2ag zzhCMvKKTh3qDa%_(<`p5r?U(-3;1XT8QYc}gJ(Jm+ofn521J{Xu3zTThF_J$AY+er zww6~~Y>Ca=a+h`lSuJ0V(S2>`cK7i%*2iKvZ}_!~6Tf?0mX*tZf-f_H$jMW{7Q5f3 zJR^m;k@#%%IqCwFLH!77&CZM{{2x=W?@@eu6rhBpoGN1jft+pO-R;qbK~lJ;H;MxX zcDE~9pM|D95a>dnjUR;urmHVV)@3Yl3Am}AOVE72Vd*L{q1+k9Xodao$UcNeZl%Mo zmudnQxC-&bLy-7zIi2Q6z)y?)reS5d0- zN@R+wromOZxCDVV2|u)5!HxdGBaF$;cQj;k!6;yIxv0q77T^9<%f2=GGIZRoJ`QtQgAo*8wc#MGW1#0D?Y21aKKZtv%u7AAqK zhra(YC^fHV2TO!mu1EowxkRe@$)q4o*0pN+MiB|4Y3?DP2!986(MM)%WhUL?mIU-5~LMv&|vDKn!Cq8o=7-uJfGQUqk^eO9>4ETjA3^_cBCQrF?2b}9Y zgnj@^k+Rsc?n4CZiQv_@{M(E|i9DPE$|T@(JXClCuzF=UzQWBykA=D9U% z!cviK98BOL6}HMqPvP}Jc*dnW!o;%EDYYQxGTe|g0)-pF6&6e8m_5mAxi%cKNIR3f z&#qu35cg^puHA%;7BxBYQN>O?wy}nfdBVqX)!=D6Pyj`Ffw{)#H8n zv(hqpl<;Gk_+`Jc6}y_H*?^e#ML!dGatcwnaZ)YecTLi(Id0Qi?R5@ugTZQ3o_m=9 z=b2Na;7k+bY1SQAF~Ur-;YI6I@;T<q8aY-5nIfRNnEcLiG;R21K|eWf{GMDR{Q7T6MsmndJDZ4c1|^VnpcwbBgi zwu8&z8GoQx0>tQjQqaXi@GUM7-upOAysAC*#R=3cD)+qGT4ks#su3~yXM2=2PJg(2 zRp+m>d_o|=2kigW&RP96;Q(5kkY+T9)M%sxMkB2-T6%~`=a_`_CM_*U2{=GN${{T< zV)W>e#*q>N@}s-2pZj0j&*z@^^KxF!%lV#qjJB@5GfL9j>MbI+E;&Eas!vd?xHdQk z!R}m3vdjHe_<7sUy!`g{8h`gno}w&(r%|bazw8Z`N$&xr_Yljok(YH0AH+-?*H2TK zK9@$|kWbsFQ)Es{RD{6Rs^g%5nf*O}`yC{vMp}d(zP_l`Lk3FNku{0dBqaOFojV*c zr-Jx~Zl~pF(KZX>I<{gC&DH&qMXLGBIlVC2v{vOd=w~qz$;`97o@Bnq+!&if5oO2`;Pv&?XwcxiadApXi*;xqH{AMPguIGdzhSLfd;>sz8J|fEa{+#>ddJSRR-XL|h&36Ye;> zSAy_tn_!4NLZ;D+NZYB`HfU)rCYh>iG6kOXv}n5^gWT=Z1r?e9nWz@C0u>Vuu6M4M zVmWa<=J}!*Xm>~Vq&^r&w_~)%$-9!cm12Pn!cSYg)wd*!RQ!GTWt7&H7t$B(Koj< zYyk7|dZGvbn4hZxSRPTT375Xi`&}a2u->&JYjK->k-_1_AZ_Q8(L-P6W|fN4kC zP}B8-1-%i}+`Q0G&G3`@AjZ7iO<>H7$iU#GrYOk1tOCg#12y)37nZwoS{<70gtCr|rsp z-(cuYKSL&bf39HO5MUq6|MXa1h?A4k&WeP}{&FJb&cXAuSwREqcQ0cTK@5kn#oCFM zYJi%O6FCyGOug&cxD&O&cRx?uaFJUNNFb93KX`Ty({e+J<qODA;^Bo|seG0dm$G#9lSDhP{?aE&BC}+2^h*15kgZnnqDS)bA zi5hffXk5E9ZBAS}#8BHT7JQJ>D=|JA=w+>vI~1;>HHAt&kKE8Fbp*f+7Xm^{tS<>Z zIhU~NPNIFWY74Vj*bchnULbNVx-_8$YGi*!I$s^p>_dSyk{d`X&Ft0*@?t+rjN!4K z2ySewU_1h((EHersHYLLKWCkmChoLrLK#tiyn9X_1389&3TMO*8Zx+bW9U?m`?(!U zo}q~iF1+AlyI9#jQJxj5Xw9!aOI;PO|7U#elB&?I2w&j(j};{MvUhY0v>Lv^IxeAddT;CQ=b ztDiD`whJ8Ddv!3TdC<@j#{0?A!d*Cc9iAcil>p)$LUm}MT(nNpn`jmrsdqH={enWl zXh2)mC$_hZ%Uhh*MO}dp zx5xWDgQ@)Zn4Xt8a$S}wM6$V9Q{-sbrzHasRrPR;txGmL)H)D!Yt;ojW(XmR?A_{) z!fd{}ojN2ua4cW&{xBaFx0Z(aE@;)c9IyWeAKEjh=E4qgB;W&DpSe}IF23P#u_F9! zBcO#W7ts~8E*fQ{e?kQatFZ?HPsU~uU@BJXyh&VP2XK+)Dt_4R0=&^sr+8+EbO^Ux- zBErz52V`;08RcW)Fg_L!!@~=)E4TGC$~!tz-6i%US)s4`$guSEXJM5@c>}*D^i6e1 za6OJWXx+Ic@!KS{DoLQ5!CR=z1vc*<2i@y>Ld2=~{6ORvg&GBu%Dw@<(~p`*U*Q8C zePi8WlnTSj+#=Rcg@iwkmR1bQuBtc_$t81cyHgSaspcr$FWnh8&Y%$L5m>E6j03Op zE6*>81Yi9P(V2yDVJ`WmjbKln2{-h%KypWJv-;sdyS+*8$IXQn_*xXtMhSD)a9o7i zdrcF%0eE35d*>8HmPgg)d}As{Mp!v!K-9Bd3eQqLX6UN&itYLJ5A^ z{;4A0(z|Dwy!ri}#AQS)$337((;AyG5LZ#}QGG5!_5BOYAIMRnH<4VYR)+OlsX4Og z2cDBZn~%V^8uI-4t~cElKJAxMLvhkGJ{u$mM}9EEdc^;=lsCo3eyz*$6-;Yg0*v}> zPCxHjX~K=Wv}diyt&yV|*H#6f=b0j;#EKAYkXc~I!!{tx^ubzy@shL^AJ@%rcf*xs zJNcxlA|y+zcylC5RBll-+4qDr-HS9n`{GPt<2WkR0txOy6#To^$0KkZNT@~48EC^r z!w!O0O{cZT1+bUt11^4DcUEoERBubz z2so;IIK*ka(qt&(^fr;MJURVBa)-+EVNKVR*v7PXO0P4hos1!T$vf<_6_InWfHH)p z#+w(mX@Vg8js^&a_R!M%K!)Den{JLyO+ycxv&C0dDvyMv`bPJ9RYEBWr>WkNfLBfr zMkIQ}J^&>@73D~lQzK7N)$k@$q75YZBWTe=V;I86ix1zG}-?@yA&7*F;&>1IGXvwo8R*NZzoOG#dWp%kn#~Yb&uE})UEZIW++Ujpi`49 zx1!1QZba~%_(UjZ-=!n$jEwK;;Dc82lcGTjc1-Q0;&oQK@)!aTEk9d;nJ*<=jdGVF zkGeNHdmR$qFq`!ym4(VsWWAJ0>Lgj8#n!FqTe(JM1%On5Ww(^J=Ysx^Whv?y7TW8M zqL2oJrdH|r6WU;*C9+%VPoz8E<=O61f?V@;gn7JoX9N`d)GN6?b99MH{(9NQRYlt3 zy@Njm|9x~HaSmiUn4RbXO{f^_xOa#~kNU1%u;ZW&OOF9B-^WuF<(&wug`ey93isG- z3Kix7n$Pzo&YEBkDkuj5W9go5Z80l9WKkC98b8{HpWxw9$l)2**YR==Fb9D1LS6)j zC5uLX_iV_J{{aoSsTjBmBqAziAf6oR=pXEaRu?OgXu4o)#pfd5DW(U?oxK3dnV+Ex z5+;3voH=EtKU?=4p~(FI@D2; zzc&bPX2Z}Bd|O`K?RJTKX$$Kk>?*~pd=#Vl>boq0NXX2z!lz4MPO*CBZ3}fmUR5Xn5ecTIh3e?k{DOpA}UsU%FXk zS@dtvK8m$?1AlK^`HxbF#rR{1N9rK%l)*gh9wZUYftiq39e*f)Uv!7__<;-WOqGnd zUsXyn2RHE+Q601yD@r<0Yre-g>sRJWxSp$r=^PVFo)Es`lDxMu@OVH2%wM3($rFrO zj0j56fka`evr~e1F>J}-=$a9-^5AXowhAicZlh+c@U?J_zNr;uE?(EXETzY)KC_tL z&+5PCqA3zoS*mz=uNhltuqKpS1fw7dnCDXX&R1uw*nB_|3(8b5Ib~D70%lx^+H${a z(eNABTH!a6;d33S$`|{Rah6y7*Q|emhR6H9eZBg%ScSg^j6J%!LqOh1DfM4{{=fcj GC-6Uo0bwlw literal 0 HcmV?d00001 diff --git a/tests/test_backend_webp.py b/tests/test_backend_webp.py new file mode 100644 index 0000000..3e34d43 --- /dev/null +++ b/tests/test_backend_webp.py @@ -0,0 +1,82 @@ +import sys +from pathlib import Path +from typing import List + +from docling.datamodel.base_models import InputFormat +from docling.datamodel.document import ConversionResult, DoclingDocument +from docling.datamodel.pipeline_options import ( + EasyOcrOptions, + OcrMacOptions, + OcrOptions, + RapidOcrOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, +) +from docling.document_converter import DocumentConverter, ImageFormatOption +from tests.verify_utils import verify_conversion_result_v2 + +from .test_data_gen_flag import GEN_TEST_DATA + +GENERATE = GEN_TEST_DATA + + +def get_webp_paths(): + # Define the directory you want to search + directory = Path("./tests/data/webp/") + + # List all WEBP files in the directory and its subdirectories + webp_files = sorted(directory.rglob("*.webp")) + return webp_files + + +def get_converter(ocr_options: OcrOptions): + image_format_option = ImageFormatOption() + image_format_option.pipeline_options.ocr_options = ocr_options + + converter = DocumentConverter( + format_options={InputFormat.IMAGE: image_format_option}, + allowed_formats=[InputFormat.IMAGE], + ) + + return converter + + +def test_e2e_webp_conversions(): + webp_paths = get_webp_paths() + + engines: List[OcrOptions] = [ + EasyOcrOptions(), + TesseractOcrOptions(), + TesseractCliOcrOptions(), + EasyOcrOptions(force_full_page_ocr=True), + TesseractOcrOptions(force_full_page_ocr=True), + TesseractOcrOptions(force_full_page_ocr=True, lang=["auto"]), + TesseractCliOcrOptions(force_full_page_ocr=True), + TesseractCliOcrOptions(force_full_page_ocr=True, lang=["auto"]), + ] + + # rapidocr is only available for Python >=3.6,<3.13 + if sys.version_info < (3, 13): + engines.append(RapidOcrOptions()) + engines.append(RapidOcrOptions(force_full_page_ocr=True)) + + # only works on mac + if "darwin" == sys.platform: + engines.append(OcrMacOptions()) + engines.append(OcrMacOptions(force_full_page_ocr=True)) + for ocr_options in engines: + print( + f"Converting with ocr_engine: {ocr_options.kind}, language: {ocr_options.lang}" + ) + converter = get_converter(ocr_options=ocr_options) + for webp_path in webp_paths: + print(f"converting {webp_path}") + + doc_result: ConversionResult = converter.convert(webp_path) + + verify_conversion_result_v2( + input_path=webp_path, + doc_result=doc_result, + generate=GENERATE, + fuzzy=True, + ) diff --git a/tests/verify_utils.py b/tests/verify_utils.py index c94c153..46a46ac 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -462,7 +462,7 @@ def verify_conversion_result_v2( def verify_document(pred_doc: DoclingDocument, gtfile: str, generate: bool = False): if not os.path.exists(gtfile) or generate: with open(gtfile, "w") as fw: - json.dump(pred_doc.export_to_dict(), fw, indent=2) + json.dump(pred_doc.export_to_dict(), fw, ensure_ascii=False, indent=2) return True else: