Archive PV: gzip large files

This commit is contained in:
Emmanuel Viennet 2024-04-15 03:21:32 +02:00
parent 1ddf9b6ab8
commit f2ce16f161
3 changed files with 59 additions and 32 deletions

View File

@ -49,11 +49,13 @@
""" """
import datetime import datetime
import glob import glob
import gzip
import mimetypes import mimetypes
import os import os
import re import re
import shutil import shutil
import time import time
import zlib
import chardet import chardet
@ -241,11 +243,13 @@ class BaseArchiver:
filename: str, filename: str,
data: str | bytes, data: str | bytes,
dept_id: int = None, dept_id: int = None,
compress=False,
): ):
"""Store data in archive, under given filename. """Store data in archive, under given filename.
Filename may be modified (sanitized): return used filename Filename may be modified (sanitized): return used filename
The file is created or replaced. The file is created or replaced.
data may be str or bytes data may be str or bytes
If compress, data is gziped and filename suffix ".gz" added.
""" """
if isinstance(data, str): if isinstance(data, str):
data = data.encode(scu.SCO_ENCODING) data = data.encode(scu.SCO_ENCODING)
@ -255,6 +259,12 @@ class BaseArchiver:
try: try:
scu.GSL.acquire() scu.GSL.acquire()
fname = os.path.join(archive_id, filename) fname = os.path.join(archive_id, filename)
if compress:
if not fname.endswith(".gz"):
fname += ".gz"
with gzip.open(fname, "wb") as f:
f.write(data)
else:
with open(fname, "wb") as f: with open(fname, "wb") as f:
f.write(data) f.write(data)
except FileNotFoundError as exc: except FileNotFoundError as exc:
@ -274,6 +284,15 @@ class BaseArchiver:
fname = os.path.join(archive_id, filename) fname = os.path.join(archive_id, filename)
log(f"reading archive file {fname}") log(f"reading archive file {fname}")
try: try:
if fname.endswith(".gz"):
try:
with gzip.open(fname) as f:
data = f.read()
except (OSError, EOFError, zlib.error) as exc:
raise ScoValueError(
f"Erreur lecture archive ({fname} invalide)"
) from exc
else:
with open(fname, "rb") as f: with open(fname, "rb") as f:
data = f.read() data = f.read()
except FileNotFoundError as exc: except FileNotFoundError as exc:
@ -288,6 +307,8 @@ class BaseArchiver:
""" """
archive_id = self.get_id_from_name(oid, archive_name, dept_id=dept_id) archive_id = self.get_id_from_name(oid, archive_name, dept_id=dept_id)
data = self.get(archive_id, filename) data = self.get(archive_id, filename)
if filename.endswith(".gz"):
filename = filename[:-3]
mime = mimetypes.guess_type(filename)[0] mime = mimetypes.guess_type(filename)[0]
if mime is None: if mime is None:
mime = "application/octet-stream" mime = "application/octet-stream"

View File

@ -68,7 +68,7 @@ PV_ARCHIVER = SemsArchiver()
def do_formsemestre_archive( def do_formsemestre_archive(
formsemestre_id, formsemestre: FormSemestre,
group_ids: list[int] = None, # si indiqué, ne prend que ces groupes group_ids: list[int] = None, # si indiqué, ne prend que ces groupes
description="", description="",
date_jury="", date_jury="",
@ -92,9 +92,8 @@ def do_formsemestre_archive(
raise ScoValueError( raise ScoValueError(
"do_formsemestre_archive: version de bulletin demandée invalide" "do_formsemestre_archive: version de bulletin demandée invalide"
) )
formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
res: NotesTableCompat = res_sem.load_formsemestre_results(formsemestre) res: NotesTableCompat = res_sem.load_formsemestre_results(formsemestre)
sem_archive_id = formsemestre_id sem_archive_id = formsemestre.id
archive_id = PV_ARCHIVER.create_obj_archive( archive_id = PV_ARCHIVER.create_obj_archive(
sem_archive_id, description, formsemestre.dept_id sem_archive_id, description, formsemestre.dept_id
) )
@ -102,9 +101,9 @@ def do_formsemestre_archive(
if not group_ids: if not group_ids:
# tous les inscrits du semestre # tous les inscrits du semestre
group_ids = [sco_groups.get_default_group(formsemestre_id)] group_ids = [sco_groups.get_default_group(formsemestre.id)]
groups_infos = sco_groups_view.DisplayedGroupsInfos( groups_infos = sco_groups_view.DisplayedGroupsInfos(
group_ids, formsemestre_id=formsemestre_id group_ids, formsemestre_id=formsemestre.id
) )
groups_filename = "-" + groups_infos.groups_filename groups_filename = "-" + groups_infos.groups_filename
etudids = [m["etudid"] for m in groups_infos.members] etudids = [m["etudid"] for m in groups_infos.members]
@ -142,19 +141,23 @@ def do_formsemestre_archive(
) )
# Bulletins en JSON # Bulletins en JSON
data = gen_formsemestre_recapcomplet_json(formsemestre_id, xml_with_decisions=True) data = gen_formsemestre_recapcomplet_json(formsemestre.id, xml_with_decisions=True)
data_js = json.dumps(data, indent=1, cls=ScoDocJSONEncoder) data_js = json.dumps(data, indent=1, cls=ScoDocJSONEncoder)
if data: if data:
PV_ARCHIVER.store( PV_ARCHIVER.store(
archive_id, "Bulletins.json", data_js, dept_id=formsemestre.dept_id archive_id,
"Bulletins.json",
data_js,
dept_id=formsemestre.dept_id,
compress=True,
) )
# Décisions de jury, en XLS # Décisions de jury, en XLS
if formsemestre.formation.is_apc(): if formsemestre.formation.is_apc():
response = jury_but_pv.pvjury_page_but(formsemestre_id, fmt="xls") response = jury_but_pv.pvjury_page_but(formsemestre.id, fmt="xls")
data = response.get_data() data = response.get_data()
else: # formations classiques else: # formations classiques
data = sco_pv_forms.formsemestre_pvjury( data = sco_pv_forms.formsemestre_pvjury(
formsemestre_id, fmt="xls", publish=False formsemestre.id, fmt="xls", publish=False
) )
if data: if data:
PV_ARCHIVER.store( PV_ARCHIVER.store(
@ -165,7 +168,7 @@ def do_formsemestre_archive(
) )
# Classeur bulletins (PDF) # Classeur bulletins (PDF)
data, _ = sco_bulletins_pdf.get_formsemestre_bulletins_pdf( data, _ = sco_bulletins_pdf.get_formsemestre_bulletins_pdf(
formsemestre_id, version=bul_version formsemestre.id, version=bul_version
) )
if data: if data:
PV_ARCHIVER.store( PV_ARCHIVER.store(
@ -173,10 +176,11 @@ def do_formsemestre_archive(
"Bulletins.pdf", "Bulletins.pdf",
data, data,
dept_id=formsemestre.dept_id, dept_id=formsemestre.dept_id,
compress=True,
) )
# Lettres individuelles (PDF): # Lettres individuelles (PDF):
data = sco_pv_lettres_inviduelles.pdf_lettres_individuelles( data = sco_pv_lettres_inviduelles.pdf_lettres_individuelles(
formsemestre_id, formsemestre.id,
etudids=etudids, etudids=etudids,
date_jury=date_jury, date_jury=date_jury,
date_commission=date_commission, date_commission=date_commission,
@ -217,7 +221,7 @@ def formsemestre_archive(formsemestre_id, group_ids: list[int] = None):
"""Make and store new archive for this formsemestre. """Make and store new archive for this formsemestre.
(all students or only selected groups) (all students or only selected groups)
""" """
formsemestre: FormSemestre = FormSemestre.query.get_or_404(formsemestre_id) formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
if not formsemestre.can_edit_pv(): if not formsemestre.can_edit_pv():
raise ScoPermissionDenied( raise ScoPermissionDenied(
dest_url=url_for( dest_url=url_for(
@ -320,7 +324,7 @@ enregistrés et non modifiables, on peut les retrouver ultérieurement.
else: else:
tf[2]["anonymous"] = False tf[2]["anonymous"] = False
do_formsemestre_archive( do_formsemestre_archive(
formsemestre_id, formsemestre,
group_ids=group_ids, group_ids=group_ids,
description=tf[2]["description"], description=tf[2]["description"],
date_jury=tf[2]["date_jury"], date_jury=tf[2]["date_jury"],
@ -352,7 +356,7 @@ def formsemestre_list_archives(formsemestre_id):
"""Page listing archives""" """Page listing archives"""
formsemestre = FormSemestre.get_formsemestre(formsemestre_id) formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
sem_archive_id = formsemestre_id sem_archive_id = formsemestre_id
L = [] archives_descr = []
for archive_id in PV_ARCHIVER.list_obj_archives( for archive_id in PV_ARCHIVER.list_obj_archives(
sem_archive_id, dept_id=formsemestre.dept_id sem_archive_id, dept_id=formsemestre.dept_id
): ):
@ -366,28 +370,30 @@ def formsemestre_list_archives(formsemestre_id):
archive_id, dept_id=formsemestre.dept_id archive_id, dept_id=formsemestre.dept_id
), ),
} }
L.append(a) archives_descr.append(a)
H = [html_sco_header.html_sem_header("Archive des PV et résultats ")] H = [html_sco_header.html_sem_header("Archive des PV et résultats ")]
if not L: if not archives_descr:
H.append("<p>aucune archive enregistrée</p>") H.append("<p>aucune archive enregistrée</p>")
else: else:
H.append("<ul>") H.append("<ul>")
for a in L: for a in archives_descr:
archive_name = PV_ARCHIVER.get_archive_name(a["archive_id"]) archive_name = PV_ARCHIVER.get_archive_name(a["archive_id"])
H.append( H.append(
'<li>%s : <em>%s</em> (<a href="formsemestre_delete_archive?formsemestre_id=%s&archive_name=%s">supprimer</a>)<ul>' f"""<li>{a["date"].strftime("%d/%m/%Y %H:%M")} : <em>{a["description"]}</em>
% ( (<a href="{ url_for( "notes.formsemestre_delete_archive", scodoc_dept=g.scodoc_dept,
a["date"].strftime("%d/%m/%Y %H:%M"), formsemestre_id=formsemestre_id, archive_name=archive_name
a["description"], )}">supprimer</a>)
formsemestre_id, <ul>"""
archive_name,
)
) )
for filename in a["content"]: for filename in a["content"]:
H.append( H.append(
'<li><a href="formsemestre_get_archived_file?formsemestre_id=%s&archive_name=%s&filename=%s">%s</a></li>' f"""<li><a href="{
% (formsemestre_id, archive_name, filename, filename) url_for( "notes.formsemestre_get_archived_file", scodoc_dept=g.scodoc_dept,
formsemestre_id=formsemestre_id,
archive_name=archive_name,
filename=filename
)}">{filename[:-3] if filename.endswith(".gz") else filename}</a></li>"""
) )
if not a["content"]: if not a["content"]:
H.append("<li><em>aucun fichier !</em></li>") H.append("<li><em>aucun fichier !</em></li>")
@ -399,7 +405,7 @@ def formsemestre_list_archives(formsemestre_id):
def formsemestre_get_archived_file(formsemestre_id, archive_name, filename): def formsemestre_get_archived_file(formsemestre_id, archive_name, filename):
"""Send file to client.""" """Send file to client."""
formsemestre: FormSemestre = FormSemestre.query.get_or_404(formsemestre_id) formsemestre = FormSemestre.get_formsemestre(formsemestre_id)
sem_archive_id = formsemestre.id sem_archive_id = formsemestre.id
return PV_ARCHIVER.get_archived_file( return PV_ARCHIVER.get_archived_file(
sem_archive_id, archive_name, filename, dept_id=formsemestre.dept_id sem_archive_id, archive_name, filename, dept_id=formsemestre.dept_id

View File

@ -1,7 +1,7 @@
# -*- mode: python -*- # -*- mode: python -*-
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
SCOVERSION = "9.6.961" SCOVERSION = "9.6.962"
SCONAME = "ScoDoc" SCONAME = "ScoDoc"