ScoDoc/app/scodoc/sco_apogee_compare.py

377 lines
13 KiB
Python
Raw Normal View History

2020-09-26 16:19:37 +02:00
# -*- mode: python -*-
# -*- coding: utf-8 -*-
##############################################################################
#
# Gestion scolarite IUT
#
2023-12-31 23:04:06 +01:00
# Copyright (c) 1999 - 2024 Emmanuel Viennet. All rights reserved.
2020-09-26 16:19:37 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Emmanuel Viennet emmanuel.viennet@viennet.net
#
##############################################################################
"""Comparaison de deux fichiers Apogée (maquettes)
1) Vérifier:
etape_apogee, vdi_apogee, cod_dip_apogee, annee_scolaire
structure: col_ids (la comparaison portera sur les colonnes communes)
2) Comparer listes d'étudiants
Présents dans A mais pas dans B
Présents dans B mais pas dans A
nombre communs
3) Comparer résultats
Pour chaque étudiant commun:
Pour chaque colonne commune:
comparer les résultats
"""
from flask import g, url_for
2020-09-26 16:19:37 +02:00
2021-08-29 19:57:32 +02:00
from app import log
from app.scodoc import sco_apogee_csv, sco_apogee_reader
from app.scodoc.sco_apogee_csv import ApoData
from app.scodoc.gen_tables import GenTable
from app.scodoc.sco_exceptions import ScoValueError
from app.scodoc import html_sco_header
from app.scodoc import sco_preferences
2020-09-26 16:19:37 +02:00
_HELP_TXT = """
2020-09-26 16:19:37 +02:00
<div class="help">
<p>Outil de comparaison de fichiers (maquettes CSV) Apogée.
</p>
<p>Cet outil compare deux fichiers fournis. Aucune donnée stockée dans ScoDoc n'est utilisée.
</p>
</div>
"""
def apo_compare_csv_form():
2021-01-01 18:40:47 +01:00
"""Form: submit 2 CSV files to compare them."""
2020-09-26 16:19:37 +02:00
H = [
html_sco_header.sco_header(page_title="Comparaison de fichiers Apogée"),
2020-09-26 16:19:37 +02:00
"""<h2>Comparaison de fichiers Apogée</h2>
<form id="apo_csv_add" action="apo_compare_csv" method="post" enctype="multipart/form-data">
""",
_HELP_TXT,
2020-09-26 16:19:37 +02:00
"""
<div class="apo_compare_csv_form_but">
2023-12-31 23:04:06 +01:00
Fichier Apogée A:
<input type="file" size="30" name="file_a"/>
2020-09-26 16:19:37 +02:00
</div>
<div class="apo_compare_csv_form_but">
2023-12-31 23:04:06 +01:00
Fichier Apogée B:
<input type="file" size="30" name="file_b"/>
2020-09-26 16:19:37 +02:00
</div>
<input type="checkbox" name="autodetect" checked/>autodétecter encodage</input>
<div class="apo_compare_csv_form_submit">
<input type="submit" value="Comparer ces fichiers"/>
</div>
</form>""",
html_sco_header.sco_footer(),
2020-09-26 16:19:37 +02:00
]
return "\n".join(H)
def apo_compare_csv(file_a, file_b, autodetect=True):
2021-01-01 18:40:47 +01:00
"""Page comparing 2 Apogee CSV files"""
try:
apo_data_a = _load_apo_data(file_a, autodetect=autodetect)
apo_data_b = _load_apo_data(file_b, autodetect=autodetect)
except (UnicodeDecodeError, UnicodeEncodeError) as exc:
dest_url = url_for("notes.semset_page", scodoc_dept=g.scodoc_dept)
if autodetect:
raise ScoValueError(
"""
Erreur: l'encodage de l'un des fichiers est mal détecté.
2023-12-31 23:04:06 +01:00
Essayez sans auto-détection, ou vérifiez le codage et le contenu
des fichiers.
""",
dest_url=dest_url,
) from exc
else:
raise ScoValueError(
f"""
Erreur: l'encodage de l'un des fichiers est incorrect.
Vérifiez qu'il est bien en {sco_apogee_reader.APO_INPUT_ENCODING}
""",
dest_url=dest_url,
) from exc
2020-09-26 16:19:37 +02:00
H = [
html_sco_header.sco_header(page_title="Comparaison de fichiers Apogée"),
2020-09-26 16:19:37 +02:00
"<h2>Comparaison de fichiers Apogée</h2>",
_HELP_TXT,
2020-09-26 16:19:37 +02:00
'<div class="apo_compare_csv">',
_apo_compare_csv(apo_data_a, apo_data_b),
2020-09-26 16:19:37 +02:00
"</div>",
"""<p><a href="apo_compare_csv_form" class="stdlink">Autre comparaison</a></p>""",
html_sco_header.sco_footer(),
2020-09-26 16:19:37 +02:00
]
return "\n".join(H)
def _load_apo_data(csvfile, autodetect=True):
"Read data from request variable and build ApoData"
data_b = csvfile.read()
2020-09-26 16:19:37 +02:00
if autodetect:
data_b, message = sco_apogee_reader.fix_data_encoding(data_b)
2020-09-26 16:19:37 +02:00
if message:
log(f"apo_compare_csv: {message}")
if not data_b:
raise ScoValueError("fichier vide ? (apo_compare_csv: no data)")
data = data_b.decode(sco_apogee_reader.APO_INPUT_ENCODING)
2020-09-26 16:19:37 +02:00
apo_data = sco_apogee_csv.ApoData(data, orig_filename=csvfile.filename)
return apo_data
def _apo_compare_csv(apo_a: ApoData, apo_b: ApoData):
2020-09-26 16:19:37 +02:00
"""Generate html report comparing A and B, two instances of ApoData
representing Apogee CSV maquettes.
"""
L = []
# 1-- Check etape and codes
L.append('<div class="section"><div class="tit">En-tête</div>')
L.append('<div><span class="key">Nom fichier A:</span><span class="val_ok">')
L.append(apo_a.orig_filename)
2020-09-26 16:19:37 +02:00
L.append("</span></div>")
L.append('<div><span class="key">Nom fichier B:</span><span class="val_ok">')
L.append(apo_b.orig_filename)
2020-09-26 16:19:37 +02:00
L.append("</span></div>")
L.append('<div><span class="key">Étape Apogée:</span>')
if apo_a.etape_apogee != apo_b.etape_apogee:
2020-09-26 16:19:37 +02:00
L.append(
f"""<span class="val_dif">{apo_a.etape_apogee} != {apo_b.etape_apogee}</span>"""
2020-09-26 16:19:37 +02:00
)
else:
L.append(f"""<span class="val_ok">{apo_a.etape_apogee}</span>""")
2020-09-26 16:19:37 +02:00
L.append("</div>")
L.append('<div><span class="key">VDI Apogée:</span>')
if apo_a.vdi_apogee != apo_b.vdi_apogee:
L.append(
f"""<span class="val_dif">{apo_a.vdi_apogee} != {apo_b.vdi_apogee}</span>"""
)
2020-09-26 16:19:37 +02:00
else:
L.append(f"""<span class="val_ok">{apo_a.vdi_apogee}</span>""")
2020-09-26 16:19:37 +02:00
L.append("</div>")
L.append('<div><span class="key">Code diplôme :</span>')
if apo_a.cod_dip_apogee != apo_b.cod_dip_apogee:
2020-09-26 16:19:37 +02:00
L.append(
f"""<span class="val_dif">{apo_a.cod_dip_apogee} != {apo_b.cod_dip_apogee}</span>"""
2020-09-26 16:19:37 +02:00
)
else:
L.append(f"""<span class="val_ok">{apo_a.cod_dip_apogee}</span>""")
2020-09-26 16:19:37 +02:00
L.append("</div>")
L.append('<div><span class="key">Année scolaire :</span>')
if apo_a.annee_scolaire != apo_b.annee_scolaire:
2020-09-26 16:19:37 +02:00
L.append(
'<span class="val_dif">%s != %s</span>'
% (apo_a.annee_scolaire, apo_b.annee_scolaire)
2020-09-26 16:19:37 +02:00
)
else:
L.append('<span class="val_ok">%s</span>' % (apo_a.annee_scolaire,))
2020-09-26 16:19:37 +02:00
L.append("</div>")
# Colonnes:
a_elts = set(apo_a.apo_csv.apo_elts.keys())
b_elts = set(apo_b.apo_csv.apo_elts.keys())
2020-09-26 16:19:37 +02:00
L.append('<div><span class="key">Éléments Apogée :</span>')
if a_elts == b_elts:
L.append(f"""<span class="val_ok">{len(a_elts)}</span>""")
2020-09-26 16:19:37 +02:00
else:
elts_communs = a_elts.intersection(b_elts)
elts_only_a = a_elts - a_elts.intersection(b_elts)
elts_only_b = b_elts - a_elts.intersection(b_elts)
2020-09-26 16:19:37 +02:00
L.append(
'<span class="val_dif">différents (%d en commun, %d seulement dans A, %d seulement dans B)</span>'
2021-01-01 18:40:47 +01:00
% (
len(elts_communs),
len(elts_only_a),
len(elts_only_b),
2021-01-01 18:40:47 +01:00
)
2020-09-26 16:19:37 +02:00
)
if elts_only_a:
2020-09-26 16:19:37 +02:00
L.append(
'<div span class="key">Éléments seulement dans A : </span><span class="val_dif">%s</span></div>'
% ", ".join(sorted(elts_only_a))
2020-09-26 16:19:37 +02:00
)
if elts_only_b:
2020-09-26 16:19:37 +02:00
L.append(
'<div span class="key">Éléments seulement dans B : </span><span class="val_dif">%s</span></div>'
% ", ".join(sorted(elts_only_b))
2020-09-26 16:19:37 +02:00
)
L.append("</div>")
L.append("</div>") # /section
# 2--
L.append('<div class="section"><div class="tit">Étudiants</div>')
a_nips = set(apo_a.etud_by_nip)
b_nips = set(apo_b.etud_by_nip)
nb_etuds_communs = len(a_nips.intersection(b_nips))
nb_etuds_dif = len(a_nips.union(b_nips) - a_nips.intersection(b_nips))
2020-09-26 16:19:37 +02:00
L.append("""<div><span class="key">Liste d'étudiants :</span>""")
if a_nips == b_nips:
2020-09-26 16:19:37 +02:00
L.append(
f"""<span class="s_ok">
{len(a_nips)} étudiants (tous présents dans chaque fichier)</span>
2020-09-26 16:19:37 +02:00
"""
)
else:
L.append(
f"""<span class="val_dif">différents ({nb_etuds_communs} en commun, {
nb_etuds_dif} différents)</span>"""
2020-09-26 16:19:37 +02:00
)
L.append("</div>")
L.append("</div>") # /section
# 3-- Résultats de chaque étudiant:
if nb_etuds_communs > 0:
L.append(
"""<div class="section sec_table">
<div class="tit">Différences de résultats des étudiants présents dans les deux fichiers
</div>
2020-09-26 16:19:37 +02:00
<p>
"""
)
T = apo_table_compare_etud_results(apo_a, apo_b)
2020-09-26 16:19:37 +02:00
if T.get_nb_rows() > 0:
L.append(T.html())
else:
L.append(
2023-12-31 23:04:06 +01:00
f"""<p class="p_ok">aucune différence de résultats
sur les {nb_etuds_communs} étudiants communs
(<em>les éléments Apogée n'apparaissant pas dans les deux
fichiers sont omis</em>)
</p>
2020-09-26 16:19:37 +02:00
"""
)
L.append("</div>") # /section
return "\n".join(L)
def apo_table_compare_etud_results(A, B):
2021-01-01 18:40:47 +01:00
""""""
2020-09-26 16:19:37 +02:00
D = compare_etuds_res(A, B)
T = GenTable(
rows=D,
titles={
"nip": "NIP",
"nom": "Nom",
"prenom": "Prénom",
"elt_code": "Element",
"type_res": "Type",
2021-12-13 10:20:16 +01:00
"val_A": "A: %s" % (A.orig_filename or ""),
"val_B": "B: %s" % (B.orig_filename or ""),
2020-09-26 16:19:37 +02:00
},
columns_ids=("nip", "nom", "prenom", "elt_code", "type_res", "val_A", "val_B"),
html_class="table_leftalign",
html_with_td_classes=True,
preferences=sco_preferences.SemPreferences(),
table_id="apo_table_compare_etud_results",
2020-09-26 16:19:37 +02:00
)
return T
def _build_etud_res(e, apo_data):
r = {}
for elt_code in apo_data.apo_csv.apo_elts:
elt = apo_data.apo_csv.apo_elts[elt_code]
2022-04-21 20:48:38 +02:00
try:
# les colonnes de cet élément
col_ids_type = [(ec["apoL_a01_code"], ec["Type Rés."]) for ec in elt.cols]
2022-04-21 20:48:38 +02:00
except KeyError as exc:
raise ScoValueError(
"Erreur: un élément sans 'Type Rés.'. Vérifiez l'encodage de vos fichiers."
2022-04-21 20:48:38 +02:00
) from exc
2020-09-26 16:19:37 +02:00
r[elt_code] = {}
for col_id, type_res in col_ids_type:
2020-09-26 16:19:37 +02:00
r[elt_code][type_res] = e.cols[col_id]
return r
def compare_etud_res(r_A, r_B, remove_missing=True):
"""Pour chaque valeur difference dans les resultats d'un etudiant
elt_code type_res val_A val_B
"""
diffs = []
elt_codes = set(r_A).union(set(r_B))
for elt_code in elt_codes:
for type_res in r_A.get(elt_code, r_B.get(elt_code)):
if elt_code not in r_A:
if remove_missing:
continue
else:
val_A = None # element absent
else:
val_A = r_A[elt_code][type_res]
if elt_code not in r_B:
if remove_missing:
continue
else:
val_B = None # element absent
else:
val_B = r_B[elt_code][type_res]
if type_res == "N":
# Cas particulier pour les notes: compare les nombres
try:
val_A_num = float(val_A.replace(",", "."))
val_B_num = float(val_B.replace(",", "."))
except ValueError:
val_A_num, val_B_num = val_A, val_B
val_A, val_B = val_A_num, val_B_num
if val_A != val_B:
diffs.append(
{
"elt_code": elt_code,
"type_res": type_res,
"val_A": val_A,
"val_B": val_B,
}
)
return diffs
def compare_etuds_res(A, B):
"""
nip, nom, prenom, elt_code, type_res, val_A, val_B
"""
A_nips = set(A.etud_by_nip)
B_nips = set(B.etud_by_nip)
common_nips = A_nips.intersection(B_nips)
# A_not_B_nips = A_nips - B_nips
# B_not_A_nips = B_nips - A_nips
D = []
for nip in common_nips:
etu_A = A.etud_by_nip[nip]
etu_B = B.etud_by_nip[nip]
r_A = _build_etud_res(etu_A, A)
r_B = _build_etud_res(etu_B, B)
diffs = compare_etud_res(r_A, r_B)
for d in diffs:
d.update(
{"nip": etu_A["nip"], "nom": etu_A["nom"], "prenom": etu_A["prenom"]}
)
D.append(d)
return D