diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 5791c0e..4c71f5c 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -412,7 +412,11 @@ class _DocumentConversionInput(BaseModel): else: return "application/xml" - if re.match(r".*?\s*)?(\nconsole.log('foo');\n" + '\n' + ) + stream = DocumentStream(name="lorem_ipsum", stream=BytesIO(f"{html_str}".encode())) + assert dci._guess_format(stream) == InputFormat.HTML + # Valid MD buf = BytesIO(Path("./tests/data/md/wiki.md").open("rb").read()) stream = DocumentStream(name="wiki.md", stream=buf)