diff --git a/app/scodoc/htmlutils.py b/app/scodoc/htmlutils.py index 68e835c3..c06e86e9 100644 --- a/app/scodoc/htmlutils.py +++ b/app/scodoc/htmlutils.py @@ -27,9 +27,12 @@ """Various HTML generation functions """ +from html.parser import HTMLParser +from html.entities import name2codepoint +import re + from flask import g, url_for -import app.scodoc.sco_utils as scu from . import listhistogram @@ -130,3 +133,63 @@ def make_menu(title, items, css_class="", alone=False): if alone: H.append("") return "".join(H) + + +""" +HTML <-> text conversions. +http://stackoverflow.com/questions/328356/extracting-text-from-html-file-using-python +""" + + +class _HTMLToText(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self._buf = [] + self.hide_output = False + + def handle_starttag(self, tag, attrs): + if tag in ("p", "br") and not self.hide_output: + self._buf.append("\n") + elif tag in ("script", "style"): + self.hide_output = True + + def handle_startendtag(self, tag, attrs): + if tag == "br": + self._buf.append("\n") + + def handle_endtag(self, tag): + if tag == "p": + self._buf.append("\n") + elif tag in ("script", "style"): + self.hide_output = False + + def handle_data(self, text): + if text and not self.hide_output: + self._buf.append(re.sub(r"\s+", " ", text)) + + def handle_entityref(self, name): + if name in name2codepoint and not self.hide_output: + c = chr(name2codepoint[name]) + self._buf.append(c) + + def handle_charref(self, name): + if not self.hide_output: + n = int(name[1:], 16) if name.startswith("x") else int(name) + self._buf.append(chr(n)) + + def get_text(self): + return re.sub(r" +", " ", "".join(self._buf)) + + +def html_to_text(html): + """ + Given a piece of HTML, return the plain text it contains. + This handles entities and char refs, but not javascript and stylesheets. + """ + parser = _HTMLToText() + try: + parser.feed(html) + parser.close() + except: # HTMLParseError: No good replacement? + pass + return parser.get_text() diff --git a/app/scodoc/sco_archives_etud.py b/app/scodoc/sco_archives_etud.py index 1805c085..3a47995a 100644 --- a/app/scodoc/sco_archives_etud.py +++ b/app/scodoc/sco_archives_etud.py @@ -30,7 +30,8 @@ les dossiers d'admission et autres pièces utiles. """ import flask -from flask import url_for, g, request +from flask import url_for, render_template +from flask import g, request from flask_login import current_user import app.scodoc.sco_utils as scu @@ -328,9 +329,9 @@ def etudarchive_import_files_form(group_id): if tf[0] == 0: return "\n".join(H) + tf[1] + "" + F - elif tf[0] == -1: - # retrouve le semestre à partir du groupe: - group = sco_groups.get_group(group_id) + # retrouve le semestre à partir du groupe: + group = sco_groups.get_group(group_id) + if tf[0] == -1: return flask.redirect( url_for( "notes.formsemestre_status", @@ -340,21 +341,41 @@ def etudarchive_import_files_form(group_id): ) else: return etudarchive_import_files( - group_id=tf[2]["group_id"], + formsemestre_id=group["formsemestre_id"], xlsfile=tf[2]["xlsfile"], zipfile=tf[2]["zipfile"], description=tf[2]["description"], ) -def etudarchive_import_files(group_id=None, xlsfile=None, zipfile=None, description=""): +def etudarchive_import_files( + formsemestre_id=None, xlsfile=None, zipfile=None, description="" +): + "Importe des fichiers" + def callback(etud, data, filename): _store_etud_file_to_new_archive(etud["etudid"], data, filename, description) - filename_title = "fichier_a_charger" - page_title = "Téléchargement de fichiers associés aux étudiants" - # Utilise la fontion au depart developpee pour les photos - r = sco_trombino.zip_excel_import_files( - xlsfile, zipfile, callback, filename_title, page_title + # Utilise la fontion developpée au depart pour les photos + ( + ignored_zipfiles, + unmatched_files, + stored_etud_filename, + ) = sco_trombino.zip_excel_import_files( + xlsfile=xlsfile, + zipfile=zipfile, + callback=callback, + filename_title="fichier_a_charger", + ) + return render_template( + "scolar/photos_import_files.html", + page_title="Téléchargement de fichiers associés aux étudiants", + ignored_zipfiles=ignored_zipfiles, + unmatched_files=unmatched_files, + stored_etud_filename=stored_etud_filename, + next_page=url_for( + "scolar.groups_view", + scodoc_dept=g.scodoc_dept, + formsemestre_id=formsemestre_id, + ), ) - return r + html_sco_header.sco_footer() diff --git a/app/scodoc/sco_trombino.py b/app/scodoc/sco_trombino.py index 7b38f6a2..9441438f 100644 --- a/app/scodoc/sco_trombino.py +++ b/app/scodoc/sco_trombino.py @@ -30,6 +30,7 @@ import io from zipfile import ZipFile, BadZipfile +from flask.templating import render_template import reportlab from reportlab.lib.units import cm, mm from reportlab.lib.enums import TA_LEFT, TA_RIGHT, TA_CENTER, TA_JUSTIFY @@ -531,25 +532,33 @@ def photos_import_files_form(group_ids=[]): elif tf[0] == -1: return flask.redirect(back_url) else: - return photos_import_files( - group_ids=tf[2]["group_ids"], + + def callback(etud, data, filename): + sco_photos.store_photo(etud, data) + + ( + ignored_zipfiles, + unmatched_files, + stored_etud_filename, + ) = zip_excel_import_files( xlsfile=tf[2]["xlsfile"], zipfile=tf[2]["zipfile"], + callback=callback, + filename_title="fichier_photo", + ) + return render_template( + "scolar/photos_import_files.html", + page_title="Téléchargement des photos des étudiants", + ignored_zipfiles=ignored_zipfiles, + unmatched_files=unmatched_files, + stored_etud_filename=stored_etud_filename, + next_page=url_for( + "scolar.groups_view", + scodoc_dept=g.scodoc_dept, + formsemestre_id=groups_infos.formsemestre_id, + curtab="tab-photos", + ), ) - - -def photos_import_files(group_ids=[], xlsfile=None, zipfile=None): - """Importation des photos""" - groups_infos = sco_groups_view.DisplayedGroupsInfos(group_ids) - back_url = "groups_view?%s&curtab=tab-photos" % groups_infos.groups_query_args - filename_title = "fichier_photo" - page_title = "Téléchargement des photos des étudiants" - - def callback(etud, data, filename): - sco_photos.store_photo(etud, data) - - zip_excel_import_files(xlsfile, zipfile, callback, filename_title, page_title) - return flask.redirect(back_url + "&head_message=photos%20 importees") def zip_excel_import_files( @@ -557,19 +566,19 @@ def zip_excel_import_files( zipfile=None, callback=None, filename_title="", # doit obligatoirement etre specifié - page_title="", ): """Importation de fichiers à partir d'un excel et d'un zip La fonction callback() - est appelé pour chaque fichier trouvé. + est appelée pour chaque fichier trouvé. + Fonction utilisée pour les photos et les fichiers étudiants (archives). """ # 1- build mapping etudid -> filename exceldata = xlsfile.read() if not exceldata: raise ScoValueError("Fichier excel vide ou invalide") _, data = sco_excel.excel_bytes_to_list(exceldata) - if not data: # probably a bug + if not data: raise ScoValueError("Fichier excel vide !") # on doit avoir une colonne etudid et une colonne filename_title ('fichier_photo') titles = data[0] @@ -591,30 +600,30 @@ def zip_excel_import_files( fn = fn.split("/")[-1] # use only last component, not directories return fn - Filename2Etud = {} # filename : etudid + filename_to_etud = {} # filename : etudid for l in data[1:]: filename = l[filename_idx].strip() if filename: - Filename2Etud[normfilename(filename)] = l[etudid_idx] + filename_to_etud[normfilename(filename)] = l[etudid_idx] # 2- Ouvre le zip et try: z = ZipFile(zipfile) except BadZipfile: - raise ScoValueError("Fichier ZIP incorrect !") + raise ScoValueError("Fichier ZIP incorrect !") from BadZipfile ignored_zipfiles = [] - stored = [] # [ (etud, filename) ] + stored_etud_filename = [] # [ (etud, filename) ] for name in z.namelist(): if len(name) > 4 and name[-1] != "/" and "." in name: data = z.read(name) # match zip filename with name given in excel normname = normfilename(name) - if normname in Filename2Etud: - etudid = Filename2Etud[normname] + if normname in filename_to_etud: + etudid = filename_to_etud[normname] # ok, store photo try: etud = sco_etud.get_etud_info(etudid=etudid, filled=True)[0] - del Filename2Etud[normname] + del filename_to_etud[normname] except: raise ScoValueError("ID étudiant invalide: %s" % etudid) @@ -624,7 +633,7 @@ def zip_excel_import_files( normfilename(name, lowercase=False), ) - stored.append((etud, name)) + stored_etud_filename.append((etud, name)) else: log("zip: zip name %s not in excel !" % name) ignored_zipfiles.append(name) @@ -632,35 +641,9 @@ def zip_excel_import_files( if name[-1] != "/": ignored_zipfiles.append(name) log("zip: ignoring %s" % name) - if Filename2Etud: + if filename_to_etud: # lignes excel non traitées - unmatched_files = list(Filename2Etud.keys()) + unmatched_files = list(filename_to_etud.keys()) else: unmatched_files = [] - # 3- Result page - H = [ - _trombino_html_header(), - """