From 66dbec86bf065650ced12293192c54d7d4505618 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Wed, 20 Oct 2021 16:47:41 +0200 Subject: [PATCH] Add cli: photos-import-files --- app/scodoc/htmlutils.py | 65 ++++++++++++- app/scodoc/sco_archives_etud.py | 45 ++++++--- app/scodoc/sco_trombino.py | 95 ++++++++----------- app/templates/scolar/photos_import_files.html | 39 ++++++++ app/templates/scolar/photos_import_files.txt | 23 +++++ scodoc.py | 45 +++++++++ 6 files changed, 243 insertions(+), 69 deletions(-) create mode 100644 app/templates/scolar/photos_import_files.html create mode 100755 app/templates/scolar/photos_import_files.txt diff --git a/app/scodoc/htmlutils.py b/app/scodoc/htmlutils.py index 68e835c3a5..c06e86e919 100644 --- a/app/scodoc/htmlutils.py +++ b/app/scodoc/htmlutils.py @@ -27,9 +27,12 @@ """Various HTML generation functions """ +from html.parser import HTMLParser +from html.entities import name2codepoint +import re + from flask import g, url_for -import app.scodoc.sco_utils as scu from . import listhistogram @@ -130,3 +133,63 @@ def make_menu(title, items, css_class="", alone=False): if alone: H.append("") return "".join(H) + + +""" +HTML <-> text conversions. +http://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python +""" + + +class _HTMLToText(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self._buf = [] + self.hide_output = False + + def handle_starttag(self, tag, attrs): + if tag in ("p", "br") and not self.hide_output: + self._buf.append("\n") + elif tag in ("script", "style"): + self.hide_output = True + + def handle_startendtag(self, tag, attrs): + if tag == "br": + self._buf.append("\n") + + def handle_endtag(self, tag): + if tag == "p": + self._buf.append("\n") + elif tag in ("script", "style"): + self.hide_output = False + + def handle_data(self, text): + if text and not self.hide_output: + self._buf.append(re.sub(r"\s+", " ", text)) + + def handle_entityref(self, name): + if name in name2codepoint and not self.hide_output: + c = chr(name2codepoint[name]) + self._buf.append(c) + + def handle_charref(self, name): + if not self.hide_output: + n = int(name[1:], 16) if name.startswith("x") else int(name) + self._buf.append(chr(n)) + + def get_text(self): + return re.sub(r" +", " ", "".join(self._buf)) + + +def html_to_text(html): + """ + Given a piece of HTML, return the plain text it contains. + This handles entities and char refs, but not javascript and stylesheets. + """ + parser = _HTMLToText() + try: + parser.feed(html) + parser.close() + except: # HTMLParseError: No good replacement? + pass + return parser.get_text() diff --git a/app/scodoc/sco_archives_etud.py b/app/scodoc/sco_archives_etud.py index 1805c085f7..3a47995a82 100644 --- a/app/scodoc/sco_archives_etud.py +++ b/app/scodoc/sco_archives_etud.py @@ -30,7 +30,8 @@ les dossiers d'admission et autres pièces utiles. """ import flask -from flask import url_for, g, request +from flask import url_for, render_template +from flask import g, request from flask_login import current_user import app.scodoc.sco_utils as scu @@ -328,9 +329,9 @@ def etudarchive_import_files_form(group_id): if tf[0] == 0: return "\n".join(H) + tf[1] + "" + F - elif tf[0] == -1: - # retrouve le semestre à partir du groupe: - group = sco_groups.get_group(group_id) + # retrouve le semestre à partir du groupe: + group = sco_groups.get_group(group_id) + if tf[0] == -1: return flask.redirect( url_for( "notes.formsemestre_status", @@ -340,21 +341,41 @@ def etudarchive_import_files_form(group_id): ) else: return etudarchive_import_files( - group_id=tf[2]["group_id"], + formsemestre_id=group["formsemestre_id"], xlsfile=tf[2]["xlsfile"], zipfile=tf[2]["zipfile"], description=tf[2]["description"], ) -def etudarchive_import_files(group_id=None, xlsfile=None, zipfile=None, description=""): +def etudarchive_import_files( + formsemestre_id=None, xlsfile=None, zipfile=None, description="" +): + "Importe des fichiers" + def callback(etud, data, filename): _store_etud_file_to_new_archive(etud["etudid"], data, filename, description) - filename_title = "fichier_a_charger" - page_title = "Téléchargement de fichiers associés aux étudiants" - # Utilise la fontion au depart developpee pour les photos - r = sco_trombino.zip_excel_import_files( - xlsfile, zipfile, callback, filename_title, page_title + # Utilise la fontion developpée au depart pour les photos + ( + ignored_zipfiles, + unmatched_files, + stored_etud_filename, + ) = sco_trombino.zip_excel_import_files( + xlsfile=xlsfile, + zipfile=zipfile, + callback=callback, + filename_title="fichier_a_charger", + ) + return render_template( + "scolar/photos_import_files.html", + page_title="Téléchargement de fichiers associés aux étudiants", + ignored_zipfiles=ignored_zipfiles, + unmatched_files=unmatched_files, + stored_etud_filename=stored_etud_filename, + next_page=url_for( + "scolar.groups_view", + scodoc_dept=g.scodoc_dept, + formsemestre_id=formsemestre_id, + ), ) - return r + html_sco_header.sco_footer() diff --git a/app/scodoc/sco_trombino.py b/app/scodoc/sco_trombino.py index 7b38f6a2d3..9441438fae 100644 --- a/app/scodoc/sco_trombino.py +++ b/app/scodoc/sco_trombino.py @@ -30,6 +30,7 @@ import io from zipfile import ZipFile, BadZipfile +from flask.templating import render_template import reportlab from reportlab.lib.units import cm, mm from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY @@ -531,25 +532,33 @@ def photos_import_files_form(group_ids=[]): elif tf[0] == -1: return flask.redirect(back_url) else: - return photos_import_files( - group_ids=tf[2]["group_ids"], + + def callback(etud, data, filename): + sco_photos.store_photo(etud, data) + + ( + ignored_zipfiles, + unmatched_files, + stored_etud_filename, + ) = zip_excel_import_files( xlsfile=tf[2]["xlsfile"], zipfile=tf[2]["zipfile"], + callback=callback, + filename_title="fichier_photo", + ) + return render_template( + "scolar/photos_import_files.html", + page_title="Téléchargement des photos des étudiants", + ignored_zipfiles=ignored_zipfiles, + unmatched_files=unmatched_files, + stored_etud_filename=stored_etud_filename, + next_page=url_for( + "scolar.groups_view", + scodoc_dept=g.scodoc_dept, + formsemestre_id=groups_infos.formsemestre_id, + curtab="tab-photos", + ), ) - - -def photos_import_files(group_ids=[], xlsfile=None, zipfile=None): - """Importation des photos""" - groups_infos = sco_groups_view.DisplayedGroupsInfos(group_ids) - back_url = "groups_view?%s&curtab=tab-photos" % groups_infos.groups_query_args - filename_title = "fichier_photo" - page_title = "Téléchargement des photos des étudiants" - - def callback(etud, data, filename): - sco_photos.store_photo(etud, data) - - zip_excel_import_files(xlsfile, zipfile, callback, filename_title, page_title) - return flask.redirect(back_url + "&head_message=photos%20 importees") def zip_excel_import_files( @@ -557,19 +566,19 @@ def zip_excel_import_files( zipfile=None, callback=None, filename_title="", # doit obligatoirement etre specifié - page_title="", ): """Importation de fichiers à partir d'un excel et d'un zip La fonction callback() - est appelé pour chaque fichier trouvé. + est appelée pour chaque fichier trouvé. + Fonction utilisée pour les photos et les fichiers étudiants (archives). """ # 1- build mapping etudid -> filename exceldata = xlsfile.read() if not exceldata: raise ScoValueError("Fichier excel vide ou invalide") _, data = sco_excel.excel_bytes_to_list(exceldata) - if not data: # probably a bug + if not data: raise ScoValueError("Fichier excel vide !") # on doit avoir une colonne etudid et une colonne filename_title ('fichier_photo') titles = data[0] @@ -591,30 +600,30 @@ def zip_excel_import_files( fn = fn.split("/")[-1] # use only last component, not directories return fn - Filename2Etud = {} # filename : etudid + filename_to_etud = {} # filename : etudid for l in data[1:]: filename = l[filename_idx].strip() if filename: - Filename2Etud[normfilename(filename)] = l[etudid_idx] + filename_to_etud[normfilename(filename)] = l[etudid_idx] # 2- Ouvre le zip et try: z = ZipFile(zipfile) except BadZipfile: - raise ScoValueError("Fichier ZIP incorrect !") + raise ScoValueError("Fichier ZIP incorrect !") from BadZipfile ignored_zipfiles = [] - stored = [] # [ (etud, filename) ] + stored_etud_filename = [] # [ (etud, filename) ] for name in z.namelist(): if len(name) > 4 and name[-1] != "/" and "." in name: data = z.read(name) # match zip filename with name given in excel normname = normfilename(name) - if normname in Filename2Etud: - etudid = Filename2Etud[normname] + if normname in filename_to_etud: + etudid = filename_to_etud[normname] # ok, store photo try: etud = sco_etud.get_etud_info(etudid=etudid, filled=True)[0] - del Filename2Etud[normname] + del filename_to_etud[normname] except: raise ScoValueError("ID étudiant invalide: %s" % etudid) @@ -624,7 +633,7 @@ def zip_excel_import_files( normfilename(name, lowercase=False), ) - stored.append((etud, name)) + stored_etud_filename.append((etud, name)) else: log("zip: zip name %s not in excel !" % name) ignored_zipfiles.append(name) @@ -632,35 +641,9 @@ def zip_excel_import_files( if name[-1] != "/": ignored_zipfiles.append(name) log("zip: ignoring %s" % name) - if Filename2Etud: + if filename_to_etud: # lignes excel non traitées - unmatched_files = list(Filename2Etud.keys()) + unmatched_files = list(filename_to_etud.keys()) else: unmatched_files = [] - # 3- Result page - H = [ - _trombino_html_header(), - """

%s

-

Opération effectuée

- """ - % page_title, - ] - if ignored_zipfiles: - H.append("

Fichiers ignorés dans le zip:

") - if unmatched_files: - H.append( - "

Fichiers indiqués dans feuille mais non trouvés dans le zip:

") - if stored: - H.append("

Fichiers chargés:

") - - return "\n".join(H) + return ignored_zipfiles, unmatched_files, stored_etud_filename diff --git a/app/templates/scolar/photos_import_files.html b/app/templates/scolar/photos_import_files.html new file mode 100644 index 0000000000..f4bae574aa --- /dev/null +++ b/app/templates/scolar/photos_import_files.html @@ -0,0 +1,39 @@ +{% extends 'base.html' %} + +{% block app_content %} + +

{{ page_title }}

+

Opération effectuée

+ +{% if ignored_zipfiles %} +

Fichiers ignorés dans le zip:

+ +{% endif %} + +{% if unmatched_files %} +

Fichiers indiqués dans la feuille mais non trouvés dans le zip:

+ +{% endif %} + +{% if stored_etud_filename %} +

Fichiers chargés:

+ +{% endif %} + +
+

Continuer +

+ +{% endblock %} \ No newline at end of file diff --git a/app/templates/scolar/photos_import_files.txt b/app/templates/scolar/photos_import_files.txt new file mode 100755 index 0000000000..d9aab53ee7 --- /dev/null +++ b/app/templates/scolar/photos_import_files.txt @@ -0,0 +1,23 @@ + +Importation des photo effectuée + +{% if ignored_zipfiles %} +# Fichiers ignorés dans le zip: + {% for name in ignored_zipfiles %} + - {{name}} + {% endfor %} +{% endif %} + +{% if unmatched_files %} +# Fichiers indiqués dans la feuille mais non trouvés dans le zip: + {% for name in unmatched_files %} + - {{name}} + {% endfor %} +{% endif %} + +{% if stored_etud_filename %} +# Fichiers chargés: + {% for (etud, name) in stored_etud_filename %} + - {{etud["nomprenom"]}}: {{name}} + {% endfor %} +{% endif %} diff --git a/scodoc.py b/scodoc.py index 818867989e..3785dc6ea2 100755 --- a/scodoc.py +++ b/scodoc.py @@ -13,6 +13,7 @@ import sys import click import flask from flask.cli import with_appcontext +from flask.templating import render_template from app import create_app, cli, db from app import initialize_scodoc_database @@ -323,6 +324,50 @@ def migrate_scodoc7_dept_archive(dept: str): # migrate-scodoc7-dept-archive tools.migrate_scodoc7_dept_archive(dept) +@app.cli.command() +@click.argument("formsemestre_id", type=click.INT) +@click.argument("xlsfile", type=click.File("rb")) +@click.argument("zipfile", type=click.File("rb")) +def photos_import_files(formsemestre_id: int, xlsfile: str, zipfile: str): + import app as mapp + from app.scodoc import sco_trombino, sco_photos + from app.scodoc import notesdb as ndb + from flask_login import login_user + from app.auth.models import get_super_admin + + sem = mapp.models.formsemestre.FormSemestre.query.get(formsemestre_id) + if not sem: + sys.stderr.write("photos-import-files: numéro de semestre invalide\n") + return 2 + + with app.test_request_context(): + mapp.set_sco_dept(sem.departement.acronym) + admin_user = get_super_admin() + login_user(admin_user) + + def callback(etud, data, filename): + sco_photos.store_photo(etud, data) + + ( + ignored_zipfiles, + unmatched_files, + stored_etud_filename, + ) = sco_trombino.zip_excel_import_files( + xlsfile=xlsfile, + zipfile=zipfile, + callback=callback, + filename_title="fichier_photo", + ) + print( + render_template( + "scolar/photos_import_files.txt", + ignored_zipfiles=ignored_zipfiles, + unmatched_files=unmatched_files, + stored_etud_filename=stored_etud_filename, + ) + ) + + @app.cli.command() @with_appcontext def clear_cache(): # clear-cache