# -*- mode: python -*- # -*- coding: utf-8 -*- import inspect import logging import pdb import time import psycopg2 import sqlalchemy from sqlalchemy import func from flask import current_app from app import db from app.auth.models import User, get_super_admin import app from app import clear_scodoc_cache from app import models from app.models import APO_CODE_STR_LEN, SHORT_STR_LEN, GROUPNAME_STR_LEN from app.scodoc import notesdb as ndb def truncate_field(table_name, field, max_len): "renvoie une fonction de troncation" def troncator(value): "Si la chaine est trop longue pour la nouvelle base, émet un warning et tronque" if value and len(value) > max_len: logging.warning( "Chaine trop longue tronquée: %s.%s=%s", table_name, field, value ) return value[:max_len] return value return troncator # Attributs dont le nom change entre les bases ScoDoc 7 et 9: # (None indique que l'attribut est supprimé, "nouveau_nom" qu'il change de nom) ATTRIBUTES_MAPPING = { "admissions": { "debouche": None, }, "adresse": { "entreprise_id": None, }, "etud_annotations": { "zope_authenticated_user": "author", "zope_remote_addr": None, }, "identite": { "foto": None, }, "notes_formsemestre": { "etape_apo2": None, # => suppressed "etape_apo3": None, "etape_apo4": None, # préférences, plus dans formsemestre: # (inutilisés depuis ScoDoc 6 environ) "bul_show_decision": None, "bul_show_uevalid": None, "nomgroupetd": None, "nomgroupetp": None, "nomgroupeta": None, "gestion_absence": None, "bul_show_codemodules": None, "bul_show_rangs": None, "bul_show_ue_rangs": None, "bul_show_mod_rangs": None, }, "partition": { "compute_ranks": None, }, "notes_appreciations": { "zope_authenticated_user": "author", "zope_remote_addr": None, }, "scolog": { "remote_addr": None, "remote_host": None, }, } # Attributs à transformer pour passer de ScoDoc 7 à 9 # la fonction est appliquée au nouvel attribut ATTRIBUTES_TRANSFORM = { "notes_formsemestre": { # la modalité CP est devenue CPRO "modalite": lambda x: x if x != "CP" else "CPRO", "bul_bgcolor": truncate_field( "notes_formsemestre", "bul_bgcolor", SHORT_STR_LEN ), }, # tronque les codes trop longs pour être honnêtes... "notes_formations": { "formation_code": truncate_field( "notes_formations", "formation_code", SHORT_STR_LEN ), "code_specialite": truncate_field( "notes_formations", "code_specialite", SHORT_STR_LEN ), }, "notes_ue": { "ue_code": truncate_field("notes_ue", "ue_code", SHORT_STR_LEN), "code_apogee": truncate_field("notes_ue", "code_apogee", APO_CODE_STR_LEN), }, "notes_modules": { "code_apogee": truncate_field("notes_modules", "code_apogee", APO_CODE_STR_LEN), }, "notes_formsemestre_etapes": { "etape_apo": truncate_field( "notes_formsemestre_etapes", "etape_apo", APO_CODE_STR_LEN ), }, "notes_form_modalites": { "modalite": truncate_field("notes_form_modalites", "modalite", SHORT_STR_LEN), }, "notes_formsemestre_inscription": { "etape": truncate_field( "notes_formsemestre_inscription", "etape", APO_CODE_STR_LEN ), }, "partition": { "partition_name": truncate_field("partition", "partition_name", SHORT_STR_LEN), }, "group_descr": { "group_name": truncate_field("group_descr", "group_name", GROUPNAME_STR_LEN), }, "scolar_autorisation_inscription": { "formation_code": truncate_field( "scolar_autorisation_inscription", "formation_code", SHORT_STR_LEN ), }, } def setup_log(dept_acronym: str): """log to console (stderr) and /opt/scodoc-data/log/migration79.log""" log_formatter = logging.Formatter( "%(asctime)s %(levelname)s (" + dept_acronym + ") %(message)s" ) # Log to file: logger = logging.getLogger() file_handler = logging.FileHandler("/opt/scodoc-data/log/migration79.log") file_handler.setFormatter(log_formatter) logger.addHandler(file_handler) # Log to stderr: console_handler = logging.StreamHandler() # stderr console_handler.setFormatter(log_formatter) logger.addHandler(console_handler) # Caution: logger.setLevel(logging.DEBUG) def import_scodoc7_dept(dept_id: str, dept_db_uri=None): """Importe un département ScoDoc7 dans ScoDoc >= 8.1 (base de donnée unique) Args: dept_id: acronyme du département ("RT") dept_db_uri: URI de la base ScoDoc7eg "postgresql:///SCORT" si None, utilise postgresql:///SCO{dept_id} """ dept = models.Departement.query.filter_by(acronym=dept_id).first() if dept: raise ValueError(f"le département {dept_id} existe déjà !") if dept_db_uri is None: dept_db_uri = f"postgresql:///SCO{dept_id}" setup_log(dept_id) logging.info(f"connecting to database {dept_db_uri}") cnx = psycopg2.connect(dept_db_uri) cursor = cnx.cursor(cursor_factory=ndb.ScoDocCursor) # FIX : des dates aberrantes (dans le futur) peuvent tenir en SQL mais pas en Python cursor.execute( """UPDATE scolar_events SET event_date='2021-09-30' WHERE event_date > '2200-01-01'""" ) cnx.commit() # Create dept: dept = models.Departement(acronym=dept_id, description="migré de ScoDoc7") db.session.add(dept) db.session.commit() # id_from_scodoc7 = {} # { scodoc7id (str) : scodoc8 id (int)} # Utilisateur de rattachement par défaut: default_user = get_super_admin() # t0 = time.time() for table, id_name in SCO7_TABLES_ORDONNEES: logging.info(f"{dept.acronym}: converting {table}...") klass = get_class_for_table(table) t1 = time.time() n = convert_table(dept, cursor, id_from_scodoc7, klass, id_name, default_user) logging.info(f" inserted {n} objects in {time.time()-t1:3.2f}s.") logging.info(f"All table imported in {time.time()-t0:3.2f}s") logging.info(f"clearing app caches...") clear_scodoc_cache() logging.info(f"Done.") logging.warning(f"Un redémarrage du serveur postgresql est conseillé.") def get_class_for_table(table): """Return ScoDoc orm class for the given SQL table: search in our models""" for name in dir(models): item = getattr(models, name) if inspect.isclass(item): if issubclass(item, db.Model): if item.__tablename__ == table: return item try: # pour les db.Table qui ne sont pas des classes (isclass est faux !) if item.name == table: return item except: pass raise ValueError(f"No model for table {table}") def get_boolean_columns(klass): "return list of names of boolean attributes in this (ScoDoc 9) model" boolean_columns = [] column_names = sqlalchemy.inspect(klass).columns.keys() for column_name in column_names: column = getattr(klass, column_name) if isinstance(column.expression.type, sqlalchemy.sql.sqltypes.Boolean): boolean_columns.append(column_name) return boolean_columns def get_table_max_id(klass): "return max id in this Table (or -1 if no id)" if not id in sqlalchemy.inspect(klass).columns.keys(): return -1 sql_table = str(klass.description) cnx = db.engine.connect() r = cnx.execute("SELECT max(id) FROM " + sql_table) r.fetchone() if r: return r[0] else: # empty table return 0 def update_table_sequence(table_name): """After filling the table, we need to update the serial so that the next insertions will use new ids """ with db.engine.connect() as cnx: cnx.execute( f"""SELECT setval('{table_name}_id_seq', (SELECT MAX(id) FROM {table_name})) """ ) def convert_table( dept, cursor, id_from_scodoc7: dict, klass=None, id_name=None, default_user=None ): "converti les élements d'une table scodoc7" # Est-ce une Table ou un Model dans l'ORM ? if isinstance(klass, sqlalchemy.sql.schema.Table): is_table = True current_id = get_table_max_id(klass) has_id = current_id != -1 table_name = str(klass.description) boolean_columns = [] else: is_table = False has_id = True table_name = klass.__tablename__ # Colonnes booléennes (valeurs à convertir depuis int) boolean_columns = get_boolean_columns(klass) # Part de l'id le plus haut actuellement présent # (évidemment, nous sommes les seuls connectés à la base destination !) current_id = db.session.query(func.max(klass.id)).first() if (current_id is None) or (current_id[0] is None): current_id = 0 else: current_id = current_id[0] cnx = db.engine.connect() # mapping: login (scodoc7) : user id (scodoc8) login2id = {u.user_name: u.id for u in User.query} # les tables ont le même nom dans les deux versions de ScoDoc: cursor.execute(f"SELECT * FROM {table_name}") objects = cursor.dictfetchall() n = 0 for obj in objects: current_id += 1 convert_object( current_id, dept, obj, has_id, id_from_scodoc7, klass, is_table, id_name, boolean_columns, login2id, default_user, cnx, ) # commit progressif pour ne pas consommer trop de mémoire: n += 1 if (not n % 1000) and cnx: db.session.commit() if cnx: cnx.close() db.session.commit() # écrit la table if has_id: update_table_sequence(table_name) return len(objects) def convert_object( new_id, dept, obj: dict, has_id: bool = True, id_from_scodoc7: dict = None, klass=None, is_table: bool = False, id_name=None, boolean_columns=None, login2id=None, default_user=None, cnx=None, # cnx à la base destination ): # Supprime l'id ScoDoc7 (eg "formsemestre_id") qui deviendra "id" if id_name: old_id = obj[id_name] del obj[id_name] if hasattr(klass, "scodoc7_id"): obj["scodoc7_id"] = old_id else: old_id = None # tables ScoDoc7 sans id if is_table: table_name = str(klass.description) else: table_name = klass.__tablename__ # Les champs contant des id utilisateurs: # chaine login en ScoDoc7, uid numérique en ScoDoc 8+ USER_REFS = {"responsable_id", "ens_id", "uid"} if not is_table: # Supprime les attributs obsoletes (très anciennes versions de ScoDoc): attributs = ATTRIBUTES_MAPPING.get(table_name, {}) # renomme ou supprime les attributs for k in attributs.keys() & obj.keys(): v = attributs[k] if v is not None: obj[v] = obj[k] del obj[k] # transforme les valeurs: obj[k] = transform(obj[k]) for k in ATTRIBUTES_TRANSFORM.get(table_name, {}): obj[k] = ATTRIBUTES_TRANSFORM[table_name][k](obj[k]) # map les ids (foreign keys) for k in obj: if (k.endswith("id") or k == "object") and k not in USER_REFS | { "semestre_id", "sem_id", "scodoc7_id", }: old_ref = obj[k] if old_ref is not None: if isinstance(old_ref, str): old_ref = old_ref.strip() elif k == "entreprise_id": # id numérique spécial old_ref = f"entreprises.{old_ref}" elif k == "entreprise_corresp_id": old_ref = f"entreprise_correspondant.{old_ref}" if old_ref == "NULL" or not old_ref: # buggy old entries new_ref = None elif old_ref in id_from_scodoc7: new_ref = id_from_scodoc7[old_ref] elif (not is_table) and table_name in { "scolog", "entreprise_correspondant", "entreprise_contact", "etud_annotations", "notes_notes_log", "scolar_news", "absences", "absences_notifications", "itemsuivi", # etudid n'était pas une clé "adresse", # etudid n'était pas une clé "admissions", # idem "scolar_events", }: # tables avec "fausses" clés # (l'object référencé a pu disparaitre) new_ref = None elif is_table and table_name in { "notes_semset_formsemestre", }: # pour anciennes installs où des relations n'avait pas été déclarées clés étrangères # eg: notes_semset_formsemestre.semset_id n'était pas une clé # Dans ce cas, mieux vaut supprimer la relation si l'un des objets n'existe pas return else: raise ValueError(f"no new id for {table_name}.{k}='{obj[k]}' !") obj[k] = new_ref # Remape les utilisateur: user.id # S'il n'existe pas, rattache à l'admin for k in USER_REFS & obj.keys(): login_scodoc7 = obj[k] uid = login2id.get(login_scodoc7) if not uid: uid = default_user.id warning_user_dont_exist( login_scodoc7, f"non existent user: {login_scodoc7}: giving {table_name}({old_id}) to admin", ) # raise ValueError(f"non existent user: {login_scodoc7}") obj[k] = uid # Converti les booléens for k in boolean_columns: if k in obj: obj[k] = bool(obj[k]) # Ajoute le département si besoin: if hasattr(klass, "dept_id"): obj["dept_id"] = dept.id # Fixe l'id (ainsi nous évitons d'avoir à commit() après chaque entrée) if has_id: obj["id"] = new_id if is_table: statement = sqlalchemy.insert(klass).values(**obj) _ = cnx.execute(statement) else: new_obj = klass(**obj) # ORM object db.session.add(new_obj) # insert_object(cnx, table_name, obj) # Stocke l'id pour les références (foreign keys): if id_name and has_id: if isinstance(old_id, int): # les id int étaient utilisés pour les "entreprises" old_id = table_name + "." + str(old_id) id_from_scodoc7[old_id] = new_id MISSING_USERS = set() # login ScoDoc7 référencés mais non existants... def warning_user_dont_exist(login_scodoc7, msg): if login_scodoc7 not in MISSING_USERS: return MISSING_USERS.add(login_scodoc7) logging.warning(msg) def insert_object(cnx, table_name: str, vals: dict) -> str: """insert tuple in db version manuelle => ne semble pas plus rapide """ cols = list(vals.keys()) colnames = ",".join(cols) fmt = ",".join(["%%(%s)s" % col for col in cols]) cnx.execute("insert into %s (%s) values (%s)" % (table_name, colnames, fmt), vals) # tables ordonnées topologiquement pour les clés étrangères: # g = nx.read_adjlist("misc/model-scodoc7.csv", create_using=nx.DiGraph,delimiter=";") # L = list(reversed(list(nx.topological_sort(g)))) SCO7_TABLES_ORDONNEES = [ # (table SQL, nom de l'id scodoc7) ("notes_formations", "formation_id"), ("notes_ue", "ue_id"), ("notes_matieres", "matiere_id"), ("notes_formsemestre", "formsemestre_id"), ("notes_modules", "module_id"), ("notes_moduleimpl", "moduleimpl_id"), ( "notes_modules_enseignants", "modules_enseignants_id", ), # (relation) avait un id modules_enseignants_id ("partition", "partition_id"), ("identite", "etudid"), # ("entreprises", "entreprise_id"), ("notes_evaluation", "evaluation_id"), ("group_descr", "group_id"), ("group_membership", "group_membership_id"), # (relation, qui avait un id) ("notes_semset", "semset_id"), ("notes_tags", "tag_id"), ("itemsuivi", "itemsuivi_id"), ("itemsuivi_tags", "tag_id"), ("adresse", "adresse_id"), ("admissions", "adm_id"), ("absences", ""), ("scolar_news", "news_id"), ("scolog", ""), ("etud_annotations", "id"), ("billet_absence", "billet_id"), # ("entreprise_correspondant", "entreprise_corresp_id"), # ("entreprise_contact", "entreprise_contact_id"), ("absences_notifications", ""), # ("notes_form_modalites", "form_modalite_id"), : déjà initialisées ("notes_appreciations", "id"), ("scolar_autorisation_inscription", "autorisation_inscription_id"), ("scolar_formsemestre_validation", "formsemestre_validation_id"), ("scolar_events", "event_id"), ("notes_notes_log", "id"), ("notes_notes", ""), ("notes_moduleimpl_inscription", "moduleimpl_inscription_id"), ("notes_formsemestre_inscription", "formsemestre_inscription_id"), ("notes_formsemestre_custommenu", "custommenu_id"), ( "notes_formsemestre_ue_computation_expr", "notes_formsemestre_ue_computation_expr_id", ), ("notes_formsemestre_uecoef", "formsemestre_uecoef_id"), ("notes_semset_formsemestre", ""), # (relation) ("notes_formsemestre_etapes", ""), ("notes_formsemestre_responsables", ""), # (relation) ("notes_modules_tags", ""), ("itemsuivi_tags_assoc", ""), # (relation) ("sco_prefs", "pref_id"), ] """ from tools.import_scodoc7_dept import * import_scodoc7_dept( "RT", "SCORT" ) """