Archive PV: gzip large files

This commit is contained in:
Emmanuel Viennet 2024-04-15 03:21:32 +02:00
parent 1ddf9b6ab8
commit f2ce16f161
3 changed files with 59 additions and 32 deletions

@ -49,11 +49,13 @@
"""
import datetime
import glob
import gzip
import mimetypes
import os
import re
import shutil
import time
import zlib
import chardet
@ -241,11 +243,13 @@ class BaseArchiver:
filename: str,
data: str | bytes,
dept_id: int = None,
compress=False,
):
"""Store data in archive, under given filename.
Filename may be modified (sanitized): return used filename
The file is created or replaced.
data may be str or bytes
If compress, data is gziped and filename suffix ".gz" added.
"""
if isinstance(data, str):
data = data.encode(scu.SCO_ENCODING)
@ -255,8 +259,14 @@ class BaseArchiver:
try:
scu.GSL.acquire()
fname = os.path.join(archive_id, filename)
with open(fname, "wb") as f:
f.write(data)
if compress:
if not fname.endswith(".gz"):
fname += ".gz"
with gzip.open(fname, "wb") as f:
f.write(data)
else:
with open(fname, "wb") as f:
f.write(data)
except FileNotFoundError as exc:
raise ScoValueError(
f"Erreur stockage archive (dossier inexistant, chemin {fname})"
@ -274,8 +284,17 @@ class BaseArchiver:
fname = os.path.join(archive_id, filename)
log(f"reading archive file {fname}")
try:
with open(fname, "rb") as f:
data = f.read()
if fname.endswith(".gz"):
try:
with gzip.open(fname) as f:
data = f.read()
except (OSError, EOFError, zlib.error) as exc:
raise ScoValueError(
f"Erreur lecture archive ({fname} invalide)"
) from exc
else:
with open(fname, "rb") as f:
data = f.read()
except FileNotFoundError as exc:
raise ScoValueError(
f"Erreur lecture archive (inexistant, chemin {fname})"
@ -288,6 +307,8 @@ class BaseArchiver:
"""
archive_id = self.get_id_from_name(oid, archive_name, dept_id=dept_id)
data = self.get(archive_id, filename)
if filename.endswith(".gz"):
filename = filename[:-3]
mime = mimetypes.guess_type(filename)[0]
if mime is None:
mime = "application/octet-stream"

@ -68,7 +68,7 @@ PV_ARCHIVER = SemsArchiver()
def do_formsemestre_archive(
formsemestre_id,
formsemestre: FormSemestre,
group_ids: list[int] = None, # si indiqué, ne prend que ces groupes
description="",
date_jury="",
@ -92,9 +92,8 @@ def do_formsemestre_archive(
raise ScoValueError(
"do_formsemestre_archive: version de bulletin demandée invalide"
)
formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
res: NotesTableCompat = res_sem.load_formsemestre_results(formsemestre)
sem_archive_id = formsemestre_id
sem_archive_id = formsemestre.id
archive_id = PV_ARCHIVER.create_obj_archive(
sem_archive_id, description, formsemestre.dept_id
)
@ -102,9 +101,9 @@ def do_formsemestre_archive(
if not group_ids:
# tous les inscrits du semestre
group_ids = [sco_groups.get_default_group(formsemestre_id)]
group_ids = [sco_groups.get_default_group(formsemestre.id)]
groups_infos = sco_groups_view.DisplayedGroupsInfos(
group_ids, formsemestre_id=formsemestre_id
group_ids, formsemestre_id=formsemestre.id
)
groups_filename = "-" + groups_infos.groups_filename
etudids = [m["etudid"] for m in groups_infos.members]
@ -142,19 +141,23 @@ def do_formsemestre_archive(
)
# Bulletins en JSON
data = gen_formsemestre_recapcomplet_json(formsemestre_id, xml_with_decisions=True)
data = gen_formsemestre_recapcomplet_json(formsemestre.id, xml_with_decisions=True)
data_js = json.dumps(data, indent=1, cls=ScoDocJSONEncoder)
if data:
PV_ARCHIVER.store(
archive_id, "Bulletins.json", data_js, dept_id=formsemestre.dept_id
archive_id,
"Bulletins.json",
data_js,
dept_id=formsemestre.dept_id,
compress=True,
)
# Décisions de jury, en XLS
if formsemestre.formation.is_apc():
response = jury_but_pv.pvjury_page_but(formsemestre_id, fmt="xls")
response = jury_but_pv.pvjury_page_but(formsemestre.id, fmt="xls")
data = response.get_data()
else: # formations classiques
data = sco_pv_forms.formsemestre_pvjury(
formsemestre_id, fmt="xls", publish=False
formsemestre.id, fmt="xls", publish=False
)
if data:
PV_ARCHIVER.store(
@ -165,7 +168,7 @@ def do_formsemestre_archive(
)
# Classeur bulletins (PDF)
data, _ = sco_bulletins_pdf.get_formsemestre_bulletins_pdf(
formsemestre_id, version=bul_version
formsemestre.id, version=bul_version
)
if data:
PV_ARCHIVER.store(
@ -173,10 +176,11 @@ def do_formsemestre_archive(
"Bulletins.pdf",
data,
dept_id=formsemestre.dept_id,
compress=True,
)
# Lettres individuelles (PDF):
data = sco_pv_lettres_inviduelles.pdf_lettres_individuelles(
formsemestre_id,
formsemestre.id,
etudids=etudids,
date_jury=date_jury,
date_commission=date_commission,
@ -217,7 +221,7 @@ def formsemestre_archive(formsemestre_id, group_ids: list[int] = None):
"""Make and store new archive for this formsemestre.
(all students or only selected groups)
"""
formsemestre: FormSemestre = FormSemestre.query.get_or_404(formsemestre_id)
formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
if not formsemestre.can_edit_pv():
raise ScoPermissionDenied(
dest_url=url_for(
@ -320,7 +324,7 @@ enregistrés et non modifiables, on peut les retrouver ultérieurement.
else:
tf[2]["anonymous"] = False
do_formsemestre_archive(
formsemestre_id,
formsemestre,
group_ids=group_ids,
description=tf[2]["description"],
date_jury=tf[2]["date_jury"],
@ -352,7 +356,7 @@ def formsemestre_list_archives(formsemestre_id):
"""Page listing archives"""
formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
sem_archive_id = formsemestre_id
L = []
archives_descr = []
for archive_id in PV_ARCHIVER.list_obj_archives(
sem_archive_id, dept_id=formsemestre.dept_id
):
@ -366,28 +370,30 @@ def formsemestre_list_archives(formsemestre_id):
archive_id, dept_id=formsemestre.dept_id
),
}
L.append(a)
archives_descr.append(a)
H = [html_sco_header.html_sem_header("Archive des PV et résultats ")]
if not L:
if not archives_descr:
H.append("<p>aucune archive enregistrée</p>")
else:
H.append("<ul>")
for a in L:
for a in archives_descr:
archive_name = PV_ARCHIVER.get_archive_name(a["archive_id"])
H.append(
'<li>%s : <em>%s</em> (<a href="formsemestre_delete_archive?formsemestre_id=%s&archive_name=%s">supprimer</a>)<ul>'
% (
a["date"].strftime("%d/%m/%Y %H:%M"),
a["description"],
formsemestre_id,
archive_name,
)
f"""<li>{a["date"].strftime("%d/%m/%Y %H:%M")} : <em>{a["description"]}</em>
(<a href="{ url_for( "notes.formsemestre_delete_archive", scodoc_dept=g.scodoc_dept,
formsemestre_id=formsemestre_id, archive_name=archive_name
)}">supprimer</a>)
<ul>"""
)
for filename in a["content"]:
H.append(
'<li><a href="formsemestre_get_archived_file?formsemestre_id=%s&archive_name=%s&filename=%s">%s</a></li>'
% (formsemestre_id, archive_name, filename, filename)
f"""<li><a href="{
url_for( "notes.formsemestre_get_archived_file", scodoc_dept=g.scodoc_dept,
formsemestre_id=formsemestre_id,
archive_name=archive_name,
filename=filename
)}">{filename[:-3] if filename.endswith(".gz") else filename}</a></li>"""
)
if not a["content"]:
H.append("<li><em>aucun fichier !</em></li>")
@ -399,7 +405,7 @@ def formsemestre_list_archives(formsemestre_id):
def formsemestre_get_archived_file(formsemestre_id, archive_name, filename):
"""Send file to client."""
formsemestre: FormSemestre = FormSemestre.query.get_or_404(formsemestre_id)
formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
sem_archive_id = formsemestre.id
return PV_ARCHIVER.get_archived_file(
sem_archive_id, archive_name, filename, dept_id=formsemestre.dept_id

@ -1,7 +1,7 @@
# -*- mode: python -*-
# -*- coding: utf-8 -*-
SCOVERSION = "9.6.961"
SCOVERSION = "9.6.962"
SCONAME = "ScoDoc"