2021-11-28 16:27:05 +01:00
# -*- mode: python -*-
# -*- coding: utf-8 -*-
""" Matrices d ' inscription aux modules d ' un semestre
"""
import pandas as pd
2023-04-04 09:57:54 +02:00
import sqlalchemy as sa
2021-11-28 16:27:05 +01:00
from app import db
#
# Le chargement des inscriptions est long: matrice nb_module x nb_etuds
# sur test debug 116 etuds, 18 modules, on est autour de 250ms.
# On a testé trois approches, ci-dessous (et retenu la 1ere)
#
2023-04-04 09:57:54 +02:00
_load_modimpl_inscr_q = sa . text (
""" SELECT etudid, 1 AS " :moduleimpl_id "
FROM notes_moduleimpl_inscription
WHERE moduleimpl_id = : moduleimpl_id """
)
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr ( formsemestre ) - > pd . DataFrame :
2021-11-28 16:27:05 +01:00
""" Charge la matrice des inscriptions aux modules du semestre
2022-01-16 23:47:52 +01:00
rows : etudid ( inscrits au semestre , avec DEM et DEF )
2022-03-27 22:25:00 +02:00
columns : moduleimpl_id
2021-11-29 00:00:44 +01:00
value : bool ( 0 / 1 inscrit ou pas )
2021-11-28 16:27:05 +01:00
"""
# méthode la moins lente: une requete par module, merge les dataframes
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [ m . id for m in formsemestre . modimpls_sorted ]
2022-01-16 23:47:52 +01:00
etudids = [ inscr . etudid for inscr in formsemestre . inscriptions ]
2021-11-28 16:27:05 +01:00
df = pd . DataFrame ( index = etudids , dtype = int )
2023-04-04 09:57:54 +02:00
with db . engine . begin ( ) as connection :
for moduleimpl_id in moduleimpl_ids :
ins_df = pd . read_sql_query (
_load_modimpl_inscr_q ,
connection ,
params = { " moduleimpl_id " : moduleimpl_id } ,
index_col = " etudid " ,
dtype = int ,
)
df = df . merge ( ins_df , how = " left " , left_index = True , right_index = True )
2022-01-29 23:36:07 +01:00
# Force columns names to integers (moduleimpl ids)
2022-04-21 22:28:17 +02:00
df . columns = pd . Index ( [ int ( x ) for x in df . columns ] , dtype = int )
2021-11-29 00:00:44 +01:00
# les colonnes de df sont en float (Nan) quand il n'y a
# aucun inscrit au module.
df . fillna ( 0 , inplace = True ) # les non-inscrits
return df . astype ( bool ) # x100 25.5s 15s 17s
2021-11-28 16:27:05 +01:00
# chrono avec timeit:
# timeit.timeit('x = df_load_module_inscr_v0(696)', number=100, globals=globals())
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr_v0 ( formsemestre ) :
2021-11-28 16:27:05 +01:00
# methode 0, pur SQL Alchemy, 1.5 à 2 fois plus lente
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [ m . id for m in formsemestre . modimpls_sorted ]
2021-12-05 20:21:51 +01:00
etudids = [ i . etudid for i in formsemestre . inscriptions ]
2021-11-28 16:27:05 +01:00
df = pd . DataFrame ( False , columns = moduleimpl_ids , index = etudids , dtype = bool )
2022-01-25 10:45:13 +01:00
for modimpl in formsemestre . modimpls_sorted :
2021-11-28 16:27:05 +01:00
ins_mod = df [ modimpl . id ]
for inscr in modimpl . inscriptions :
ins_mod [ inscr . etudid ] = True
return df # x100 30.7s 46s 32s
2021-12-05 20:21:51 +01:00
def df_load_modimpl_inscr_v2 ( formsemestre ) :
2022-01-25 10:45:13 +01:00
moduleimpl_ids = [ m . id for m in formsemestre . modimpls_sorted ]
2021-12-05 20:21:51 +01:00
etudids = [ i . etudid for i in formsemestre . inscriptions ]
2021-11-28 16:27:05 +01:00
df = pd . DataFrame ( False , columns = moduleimpl_ids , index = etudids , dtype = bool )
cursor = db . engine . execute (
" select moduleimpl_id, etudid from notes_moduleimpl_inscription i, notes_moduleimpl m where i.moduleimpl_id = m.id and m.formsemestre_id = %(formsemestre_id)s " ,
2021-12-05 20:21:51 +01:00
{ " formsemestre_id " : formsemestre . id } ,
2021-11-28 16:27:05 +01:00
)
for moduleimpl_id , etudid in cursor :
df [ moduleimpl_id ] [ etudid ] = True
return df # x100 44s, 31s, 29s, 28s