From f2ce16f161aa1153e9d87628690b71840747a471 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Mon, 15 Apr 2024 03:21:32 +0200 Subject: [PATCH] Archive PV: gzip large files --- app/scodoc/sco_archives.py | 29 ++++++++++-- app/scodoc/sco_archives_formsemestre.py | 60 ++++++++++++++----------- sco_version.py | 2 +- 3 files changed, 59 insertions(+), 32 deletions(-) diff --git a/app/scodoc/sco_archives.py b/app/scodoc/sco_archives.py index 03747f0c..56d501ab 100644 --- a/app/scodoc/sco_archives.py +++ b/app/scodoc/sco_archives.py @@ -49,11 +49,13 @@ """ import datetime import glob +import gzip import mimetypes import os import re import shutil import time +import zlib import chardet @@ -241,11 +243,13 @@ class BaseArchiver: filename: str, data: str | bytes, dept_id: int = None, + compress=False, ): """Store data in archive, under given filename. Filename may be modified (sanitized): return used filename The file is created or replaced. data may be str or bytes + If compress, data is gziped and filename suffix ".gz" added. """ if isinstance(data, str): data = data.encode(scu.SCO_ENCODING) @@ -255,8 +259,14 @@ class BaseArchiver: try: scu.GSL.acquire() fname = os.path.join(archive_id, filename) - with open(fname, "wb") as f: - f.write(data) + if compress: + if not fname.endswith(".gz"): + fname += ".gz" + with gzip.open(fname, "wb") as f: + f.write(data) + else: + with open(fname, "wb") as f: + f.write(data) except FileNotFoundError as exc: raise ScoValueError( f"Erreur stockage archive (dossier inexistant, chemin {fname})" @@ -274,8 +284,17 @@ class BaseArchiver: fname = os.path.join(archive_id, filename) log(f"reading archive file {fname}") try: - with open(fname, "rb") as f: - data = f.read() + if fname.endswith(".gz"): + try: + with gzip.open(fname) as f: + data = f.read() + except (OSError, EOFError, zlib.error) as exc: + raise ScoValueError( + f"Erreur lecture archive ({fname} invalide)" + ) from exc + else: + with open(fname, "rb") as f: + data = f.read() except FileNotFoundError as exc: raise ScoValueError( f"Erreur lecture archive (inexistant, chemin {fname})" @@ -288,6 +307,8 @@ class BaseArchiver: """ archive_id = self.get_id_from_name(oid, archive_name, dept_id=dept_id) data = self.get(archive_id, filename) + if filename.endswith(".gz"): + filename = filename[:-3] mime = mimetypes.guess_type(filename)[0] if mime is None: mime = "application/octet-stream" diff --git a/app/scodoc/sco_archives_formsemestre.py b/app/scodoc/sco_archives_formsemestre.py index 6a77e650..f3eb098d 100644 --- a/app/scodoc/sco_archives_formsemestre.py +++ b/app/scodoc/sco_archives_formsemestre.py @@ -68,7 +68,7 @@ PV_ARCHIVER = SemsArchiver() def do_formsemestre_archive( - formsemestre_id, + formsemestre: FormSemestre, group_ids: list[int] = None, # si indiqué, ne prend que ces groupes description="", date_jury="", @@ -92,9 +92,8 @@ def do_formsemestre_archive( raise ScoValueError( "do_formsemestre_archive: version de bulletin demandée invalide" ) - formsemestre = FormSemestre.get_formsemestre(formsemestre_id) res: NotesTableCompat = res_sem.load_formsemestre_results(formsemestre) - sem_archive_id = formsemestre_id + sem_archive_id = formsemestre.id archive_id = PV_ARCHIVER.create_obj_archive( sem_archive_id, description, formsemestre.dept_id ) @@ -102,9 +101,9 @@ def do_formsemestre_archive( if not group_ids: # tous les inscrits du semestre - group_ids = [sco_groups.get_default_group(formsemestre_id)] + group_ids = [sco_groups.get_default_group(formsemestre.id)] groups_infos = sco_groups_view.DisplayedGroupsInfos( - group_ids, formsemestre_id=formsemestre_id + group_ids, formsemestre_id=formsemestre.id ) groups_filename = "-" + groups_infos.groups_filename etudids = [m["etudid"] for m in groups_infos.members] @@ -142,19 +141,23 @@ def do_formsemestre_archive( ) # Bulletins en JSON - data = gen_formsemestre_recapcomplet_json(formsemestre_id, xml_with_decisions=True) + data = gen_formsemestre_recapcomplet_json(formsemestre.id, xml_with_decisions=True) data_js = json.dumps(data, indent=1, cls=ScoDocJSONEncoder) if data: PV_ARCHIVER.store( - archive_id, "Bulletins.json", data_js, dept_id=formsemestre.dept_id + archive_id, + "Bulletins.json", + data_js, + dept_id=formsemestre.dept_id, + compress=True, ) # Décisions de jury, en XLS if formsemestre.formation.is_apc(): - response = jury_but_pv.pvjury_page_but(formsemestre_id, fmt="xls") + response = jury_but_pv.pvjury_page_but(formsemestre.id, fmt="xls") data = response.get_data() else: # formations classiques data = sco_pv_forms.formsemestre_pvjury( - formsemestre_id, fmt="xls", publish=False + formsemestre.id, fmt="xls", publish=False ) if data: PV_ARCHIVER.store( @@ -165,7 +168,7 @@ def do_formsemestre_archive( ) # Classeur bulletins (PDF) data, _ = sco_bulletins_pdf.get_formsemestre_bulletins_pdf( - formsemestre_id, version=bul_version + formsemestre.id, version=bul_version ) if data: PV_ARCHIVER.store( @@ -173,10 +176,11 @@ def do_formsemestre_archive( "Bulletins.pdf", data, dept_id=formsemestre.dept_id, + compress=True, ) # Lettres individuelles (PDF): data = sco_pv_lettres_inviduelles.pdf_lettres_individuelles( - formsemestre_id, + formsemestre.id, etudids=etudids, date_jury=date_jury, date_commission=date_commission, @@ -217,7 +221,7 @@ def formsemestre_archive(formsemestre_id, group_ids: list[int] = None): """Make and store new archive for this formsemestre. (all students or only selected groups) """ - formsemestre: FormSemestre = FormSemestre.query.get_or_404(formsemestre_id) + formsemestre = FormSemestre.get_formsemestre(formsemestre_id) if not formsemestre.can_edit_pv(): raise ScoPermissionDenied( dest_url=url_for( @@ -320,7 +324,7 @@ enregistrés et non modifiables, on peut les retrouver ultérieurement. else: tf[2]["anonymous"] = False do_formsemestre_archive( - formsemestre_id, + formsemestre, group_ids=group_ids, description=tf[2]["description"], date_jury=tf[2]["date_jury"], @@ -352,7 +356,7 @@ def formsemestre_list_archives(formsemestre_id): """Page listing archives""" formsemestre = FormSemestre.get_formsemestre(formsemestre_id) sem_archive_id = formsemestre_id - L = [] + archives_descr = [] for archive_id in PV_ARCHIVER.list_obj_archives( sem_archive_id, dept_id=formsemestre.dept_id ): @@ -366,28 +370,30 @@ def formsemestre_list_archives(formsemestre_id): archive_id, dept_id=formsemestre.dept_id ), } - L.append(a) + archives_descr.append(a) H = [html_sco_header.html_sem_header("Archive des PV et résultats ")] - if not L: + if not archives_descr: H.append("

aucune archive enregistrée

") else: H.append("