From 3eda56e89c50ed57cba81d31264a738fe0536a37 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Sun, 12 Dec 2021 16:53:52 +0100 Subject: [PATCH] Fix: read non-utf8 archives index --- app/scodoc/sco_archives.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/app/scodoc/sco_archives.py b/app/scodoc/sco_archives.py index f747d9127..5884254e1 100644 --- a/app/scodoc/sco_archives.py +++ b/app/scodoc/sco_archives.py @@ -47,6 +47,7 @@ qui est une description (humaine, format libre) de l'archive. """ +import chardet import datetime import glob import mimetypes @@ -203,8 +204,16 @@ class BaseArchiver(object): def get_archive_description(self, archive_id): """Return description of archive""" self.initialize() - with open(os.path.join(archive_id, "_description.txt")) as f: - descr = f.read() + filename = os.path.join(archive_id, "_description.txt") + try: + with open(filename) as f: + descr = f.read() + except UnicodeDecodeError: + # some (old) files may have saved under exotic encodings + with open(filename, "rb") as f: + data = f.read() + descr = data.decode(chardet.detect(data)["encoding"]) + return descr def create_obj_archive(self, oid: int, description: str):