fix: Let BeautifulSoup detect the HTML encoding (#695)
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
2d24faecd9
commit
42856fdf79
@ -37,10 +37,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
text_stream = self.path_or_stream.getvalue().decode("utf-8")
|
text_stream = self.path_or_stream.getvalue()
|
||||||
self.soup = BeautifulSoup(text_stream, "html.parser")
|
self.soup = BeautifulSoup(text_stream, "html.parser")
|
||||||
if isinstance(self.path_or_stream, Path):
|
if isinstance(self.path_or_stream, Path):
|
||||||
with open(self.path_or_stream, "r", encoding="utf-8") as f:
|
with open(self.path_or_stream, "rb") as f:
|
||||||
html_content = f.read()
|
html_content = f.read()
|
||||||
self.soup = BeautifulSoup(html_content, "html.parser")
|
self.soup = BeautifulSoup(html_content, "html.parser")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Loading…
Reference in New Issue
Block a user