diff --git a/app/comp/inscr_mod.py b/app/comp/inscr_mod.py new file mode 100644 index 0000000000..d3dad9b430 --- /dev/null +++ b/app/comp/inscr_mod.py @@ -0,0 +1,70 @@ +# -*- mode: python -*- +# -*- coding: utf-8 -*- + +"""Matrices d'inscription aux modules d'un semestre +""" +import numpy as np +import pandas as pd + +from app import db +from app import models + +# +# Le chargement des inscriptions est long: matrice nb_module x nb_etuds +# sur test debug 116 etuds, 18 modules, on est autour de 250ms. +# On a testé trois approches, ci-dessous (et retenu la 1ere) +# +def df_load_modimpl_inscr(formsemestre_id): + """Charge la matrice des inscriptions aux modules du semestre + rows: etudid + columns: moduleimpl_id + value: int (0/1 inscrit ou pas) + """ + # méthode la moins lente: une requete par module, merge les dataframes + sem = models.FormSemestre.query.get(formsemestre_id) + moduleimpl_ids = [m.id for m in sem.modimpls] + etudids = [i.etudid for i in sem.inscriptions] + df = pd.DataFrame(index=etudids, dtype=int) + for moduleimpl_id in moduleimpl_ids: + ins_df = pd.read_sql_query( + """SELECT etudid, 1 AS "%(moduleimpl_id)s" + FROM notes_moduleimpl_inscription + WHERE moduleimpl_id=%(moduleimpl_id)s""", + db.engine, + params={"moduleimpl_id": moduleimpl_id}, + index_col="etudid", + dtype=int, + ) + df = df.merge(ins_df, how="outer", left_index=True, right_index=True) + return df # x100 25.5s 15s 17s + + +# chrono avec timeit: +# timeit.timeit('x = df_load_module_inscr_v0(696)', number=100, globals=globals()) + + +def df_load_modimpl_inscr_v0(formsemestre_id): + # methode 0, pur SQL Alchemy, 1.5 à 2 fois plus lente + sem = models.FormSemestre.query.get(formsemestre_id) + moduleimpl_ids = [m.id for m in sem.modimpls] + etudids = [i.etudid for i in sem.inscriptions] + df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool) + for modimpl in sem.modimpls: + ins_mod = df[modimpl.id] + for inscr in modimpl.inscriptions: + ins_mod[inscr.etudid] = True + return df # x100 30.7s 46s 32s + + +def df_load_modimpl_inscr_v2(formsemestre_id): + sem = models.FormSemestre.query.get(formsemestre_id) + moduleimpl_ids = [m.id for m in sem.modimpls] + etudids = [i.etudid for i in sem.inscriptions] + df = pd.DataFrame(False, columns=moduleimpl_ids, index=etudids, dtype=bool) + cursor = db.engine.execute( + "select moduleimpl_id, etudid from notes_moduleimpl_inscription i, notes_moduleimpl m where i.moduleimpl_id = m.id and m.formsemestre_id = %(formsemestre_id)s", + {"formsemestre_id": formsemestre_id}, + ) + for moduleimpl_id, etudid in cursor: + df[moduleimpl_id][etudid] = True + return df # x100 44s, 31s, 29s, 28s