diff --git a/get.py b/get.py index 6872ece..885faa7 100755 --- a/get.py +++ b/get.py @@ -9,6 +9,7 @@ import pdb # used for debugging import inspect # used for debugging import random import sys +import re # Third-party libs import requests @@ -34,8 +35,10 @@ def debug(*args): debug.counter += 1 caller_frame = inspect.currentframe().f_back where = str(caller_frame.f_lineno) + "@" + caller_frame.f_code.co_name - if len(args) > 0: + if len(args) > 1: print(f"[DEBUG {debug.counter}:{where}] " + str(args[0]), args[1:]) + elif len(args) > 0: + print(f"[DEBUG {debug.counter}:{where}] " + str(args[0])) else: print(f"[DEBUG {debug.counter}:{where}] (no reason given)") @@ -70,10 +73,6 @@ API_URL = f"{SCODOC_SERVER}/ScoDoc/api" DEBUG = True # Not used BLOCKING = True # Die if csv is incorrect -# TODO : refactor / put globals in a class, eg Config -depts = [] -orderkey = "" - def blockordie(reason: str = "", status: int = 2): if reason: @@ -84,73 +83,439 @@ def blockordie(reason: str = "", status: int = 2): sys.exit(status) -class Options: - pass +class Filter: + # Filter on students to be considered + # 1 consider only technological baccalaureates, statistics are always asked + # 2 consider only women, because gender statistics are frequently asked + # 4 consider only incoming students (primo-entrants) in first year of the cohort + # 8 consider only people having a first year, not parallel entries + TECHNO = 1 + WOMAN = 2 + PRIMO = 4 + MAIN = 8 + + +class OptionSet: + def __init__(self, values={}): + # Initialise un dictionnaire interne pour stocker les options + if type(values) == type({}): + self._options = values + else: + self._options = {} + self._orderkey = None + self._main_filter = None + self._secondary_filter = None + self._depts = [] + + def __getitem__(self, key): + # Récupère la valeur correspondant à la clé + return self._options[key] + + def __setitem__(self, key, value): + # Assigne la valeur à la clé donnée + self._options[key] = value + + def __delitem__(self, key): + # Supprime la clé spécifiée + if key in self._options: + del self._options[key] + + def __contains__(self, key): + # Permet l'utilisation de 'in' pour vérifier l'existence d'une clé + return key in self._options + + def __getattr__(self, name): + try: + return self._options[name] + except KeyError: + raise AttributeError(f"'Options' object has no attribute '{name}'") + + def __setattr__(self, name, value): + if name[0] == "_": + super().__setattr__(name, value) + else: + self._options[name] = value + + def __delattr__(self, name): + # Appelé quand un attribut est supprimé + if name in self._options: + del self._options[name] + else: + raise AttributeError(f"'Options' object has no attribute '{name}'") + + def __repr__(self): + return f"Options({self._options})" + + def asDict(self): + return self._options + + def asCLI(self, excludeDefault=True, onlyOrders=False, depts=True): + cli = [] + if not onlyOrders: + for opt in self.__class__.choiceoptions: + if self[opt[0]] == 0 and excludeDefault: + continue + cli.append("--" + opt[1][self[opt[0]]]) + for opt in self.__class__.stringoptions: + if excludeDefault and self[opt[0]] == opt[1]: + continue + cli.append("--" + opt[0]) + cli.append(self[opt[0]]) + for opt in self.__class__.posint_options: + if excludeDefault and self[opt[0]] == opt[1]: + continue + cli.append("--" + opt[0]) + cli.append(self[opt[0]]) + for opt in self.__class__.booleanoptions: + if excludeDefault and self[opt[0]] == opt[1]: + continue + if self[opt[0]]: + cli.append("--" + opt[0]) + else: + cli.append("--no-" + opt[0]) + if "override" in self._options: + for FIELD, FIELDdict in self._options["override"].items(): + for FIELDVALUE, FIELDVALUEdict in FIELDdict.items(): + for key, val in FIELDVALUEdict.items(): + cli.extend(["--override", FIELD, FIELDVALUE, key, val]) + if "orders" in self._options: + orders = self._options["orders"] + cli.append("--orders") + for i, column in enumerate(orders): + if i > 0: + cli.append("/") + for row in column: + cli.append(row) + cli.append(".") + if depts: + cli.extend(self._depts) + return cli + + def orderkey(self, filters=False): + if filters: + d = self._depts.copy() + d.append(str(self.filter())) + d.append(str(self.filter(main=False))) + return "_".join(d) + if self._orderkey is not None: + return self._orderkey + self._orderkey = "_".join(self._depts) + return self._orderkey + + def depts(self, xset=None): + if xset: + self._depts = xset + return self._depts + + def filter(self, main: bool = True): + r = 0 + stem = "base" + if not main: + if self._secondary_filter is not None: + return self._secondary_filter + stem = "secondary" + else: + if self._main_filter is not None: + return self._main_filter + for suffix, f in {"techno": Filter.TECHNO, "women": Filter.WOMAN}.items(): + option = f"{stem}_{suffix}" + if option in self._options and self._options[option]: + r |= f + if main: + self._main_filter = r + return r + self._secondary_filter = r | self.filter() + return self._secondary_filter + + stringoptions = [ + [ + "department_separator", + " ", + "Separator before department in semester designation/display/origin designation", + ], + [ + "diplome_separator", + "", + "Separator before diploma in semester designation/display/origin designation", + ], + [ + "modalite_separator", + " ", + "Separator before modality in semester designation/display/origin designation", + ], + [ + "parcours_separator", + "/", + "Separator before parcours in semester designation/display/origin designation", + ], + [ + "rank_separator", + "", + "Separator before rank (~year of progress) in cursus designation/display/origin designation", + ], + [ + "year_separator", + " ", + "Separator before year in semester designation/display/origin designation", + ], + [ + "nick", + "{diplome}{rank}{multidepartment}{modalite}{parcours}{year}", + "Yearly cursus designation (should be unique for each distinguisable cursus choice)", + ], + [ + "displayname", + "{diplome}{rank}{multidepartment}{modaliteshort}{parcours}", + "Yearly cursus origin (used only for captionning the flow)", + ], + [ + "extnick", + "{ext}{rank}{multidepartment}{diplomenobut}{modaliteshort}", + "Origin designation (should be unique for each distinguisable origin of students)", + ], + ] + + choiceoptions = [["algo", ["optimize", "reuse", "restart"]]] + + booleanoptions = [ + ["base_techno", False, "Base population includes only techno students"], + ["base_women", False, "Base population includes only women students"], + [ + "secondary_techno", + True, + "Secondary (focused) population includes only techno students", + ], + [ + "secondary_women", + False, + "Secondary (focused) population includes only women students", + ], + ] + + posint_options = [ + ["spacing", 14, 0, 30, "Spacing between groups in the same column"], + ["thickness", 6, 0, 30, "Width of the group bars in columns"], + ["hmargin", 20, 0, 50, "Global margin around the graph"], + ["fontsize_name", 10, 0, 30, "Font size of the group name"], + ["fontsize_count", 14, 0, 30, "Font size of the population marks"], + ["width", 1300, 800, None, "Width of the graphics (not counting captions)"], + ["statwidth", 300, 0, None, "Width of the side caption"], + ["height", 0, 0, None, "Height of the graphics (0 = automaticd)"], + ["loops", 300, 0, 1000, "Number of loops of the optimization algorithm"], + ["baseyear", 2021, 2000, None, "Base year (start of the cohort)"], + ] + + shortcuts = {"baseyear": ["--base", "-b"], "loops": ["-l"]} + + +def range_limited_int_type(arg, MIN_VAL, MAX_VAL): + """Type function for argparse - an integer within some predefined bounds""" + try: + f = int(arg) + except ValueError: + raise argparse.ArgumentTypeError("Must be an integer point number") + if MIN_VAL is not None and f < MIN_VAL: + raise argparse.ArgumentTypeError( + "Argument must be larger or equal to " + str(MIN_VAL) + ) + if MAX_VAL is not None and f > MAX_VAL: + raise argparse.ArgumentTypeError( + "Argument must be smaller or equal to " + str(MAX_VAL) + ) + return f + + +def format_for_shell(strings): + # Regex pour détecter les caractères spéciaux + special_chars = re.compile(r"[^+/.a-zA-Z0-9_-]") + + formatted_strings = [] + for ss in strings: + s = str(ss) + if special_chars.search(s): # Si la chaîne contient des caractères spéciaux + formatted_s = "'" + s.replace("'", "'\"'\"'") + "'" + elif len(s) == 0: + formatted_s = "''" + else: + formatted_s = s + formatted_strings.append(formatted_s) + + # Concatène les chaînes pour qu'elles soient prêtes à copier-coller dans le shell + return " ".join(formatted_strings) -# def cli_check(): """Read args from the command line then read config from {orderkey}.json """ - global orderkey # TODO: globales à supprimer - global depts + parser = argparse.ArgumentParser( + usage=""" + %(prog)s [--options] DEPARTEMENTS... - parser = argparse.ArgumentParser(description="Process some departments.") - parser.add_argument("depts", nargs="*", help="List of departments") + OR + + %(prog)s FILE.json + """, + description="Create a sankey diagram for the evolution of students through some departments.", + ) parser.add_argument( - "--base", - "-b", - type=int, - choices=range(2000, 2067), - default=2021, - help="base year for the cohort (integer between 2000 and 2666)", + "--orders", + nargs="+", + help="Start of orders list with subgroups separated by / ended by .", ) + + parser.add_argument("depts", nargs="*", help="List of departments") + + # STRING OPTIONS + for opt in OptionSet.stringoptions: + xopt = ["--" + opt[0]] + if opt[0] in OptionSet.shortcuts: + xopt.extend(OptionSet.shortcuts[opt[0]]) + parser.add_argument(*xopt, type=str, default=opt[1], help=opt[2]) + + # POSITIVE INTEGERS OPTIONS + for opt in OptionSet.posint_options: + xopt = ["--" + opt[0]] + if opt[0] in OptionSet.shortcuts: + xopt.extend(OptionSet.shortcuts[opt[0]]) + optrange = "" + if opt[3] == None and opt[2] != None: + optrange = f" larger or equal to {opt[2]}" + elif opt[2] == None and opt[3] != None: + optrange = f" smaller than {opt[3]}" + elif opt[2] != None and opt[3] != None: + optrange = f" (between {opt[2]} and {opt[3]})" + + def rangefactory(y, z): + return lambda x: range_limited_int_type(x, y, z) + + parser.add_argument( + *xopt, + type=rangefactory(opt[2], opt[3]), + default=opt[1], + help=opt[4] + optrange, + ) + + # BOOLEAN OPTIONS + for opt in OptionSet.booleanoptions: + g = parser.add_mutually_exclusive_group() + xopt = ["--" + opt[0]] + if opt[0] in OptionSet.shortcuts: + xopt.extend(OptionSet.shortcuts[opt[0]]) + g.add_argument(*xopt, action="store_true", default=opt[1], help=opt[2]) + xopt = ["--no-" + opt[0]] + if "no-" + opt[0] in OptionSet.shortcuts: + xopt.extend(OptionSet.shortcuts["no-" + opt[0]]) + g.add_argument( + *xopt, + action="store_true", + # help=opt[2].replace("includes only", "doesn't care about"), + ) + + # OTHER OPTIONS + + parser.add_argument( + "--override", + nargs=4, + metavar=( + "FIELD", + "FIELD_VALUE", + "REPLACEMENT_FIELD", + "REPLACEMENT_FIELD_VALUE", + ), + help="Override a specific field with a fixed value in some specific semester(s) selected by FIELD=FIELD_VALUE", + ) + optimize_group = parser.add_mutually_exclusive_group() - optimize_group.add_argument( - "--reuse", action="store_true", help="Reuse mode, sets value to 0" - ) + optimize_group.add_argument("--reuse", "-r", action="store_true", help="Reuse mode") optimize_group.add_argument( "--optimize", - type=str, - nargs="?", - const="100", # Default value if --optimize is used without specifying n - help="Optimize mode, takes an optional integer (default is 100, or 300 if no optimization option specified)", + "-o", + action="store_true", + help="Use algorithm to enhance graph (using last result)", ) + optimize_group.add_argument( "--restart", - type=str, - nargs="?", - const="300", # Default value if --restart is used without specifying n - help="Restart & Optimize mode, takes an optional integer (default is 300)", + "-R", + action="store_true", + help="Use algorithm to enhance graph (starting from random)", ) - args = parser.parse_args() - Options.restart = False - if args.reuse: - Options.optimize = 0 - elif args.restart is not None: - Options.restart = True + if len(sys.argv) > 1 and sys.argv[1].endswith(".json"): try: - Options.optimize = -int(args.restart) - except (TypeError, ValueError): - Options.optimize = -300 - if args.restart: - args.depts.insert(0, args.restart) + json_file = sys.argv[1] + with open(json_file, "r") as f: + fakeclisource = json.load(f) + fakecli = [str(x) for x in fakeclisource] + except FileNotFoundError: + die(f"Error: File '{json_file}' not found.", 1) + except json.JSONDecodeError: + die(f"Error: File '{json_file}' is not valid JSON.", 1) + args = parser.parse_args(args=fakecli) else: - try: - Options.optimize = int(args.optimize) - except (TypeError, ValueError): - Options.optimize = 300 - if args.optimize: - args.depts.insert(0, args.optimize) - - Options.base_year = args.base - depts = args.depts - orderkey = "_".join(depts) - - if len(depts) == 0: + args = parser.parse_args() + if len(args.depts) == 0 and ( + args.orders is None or args.orders[-1] == "." or "." not in args.orders + ): parser.print_help() sys.exit(0) + return args + + +def options_from_args(args): + Options = OptionSet() + # Gestion de --orders pour construire le tableau 2D + orders = [] + if args.depts: + depts = args.depts + else: + depts = [] + if args.orders: + current_order = [] + l = args.orders.copy() + idx = 0 + while len(l) > idx: + item = l[idx] + idx += 1 + if item == "/": + # Nouvelle ligne à chaque "--next" + orders.append(current_order) + current_order = [] + elif item == ".": + # Fin de la liste d'ordres + orders.append(current_order) + break + else: + # Ajouter l'élément au sous-groupe en cours + current_order.append(item) + depts.extend(l[idx:]) + Options["orders"] = orders + dargs = vars(args) + for opt in OptionSet.posint_options: + if opt[0] in dargs: + Options[opt[0]] = dargs[opt[0]] + for opt in OptionSet.stringoptions: + if opt[0] in dargs: + Options[opt[0]] = dargs[opt[0]] + for opt in OptionSet.booleanoptions: + if opt[0] in dargs: + if "no_" + opt[0] in dargs and dargs["no_" + opt[0]]: + Options[opt[0]] = not dargs["no_" + opt[0]] + else: + Options[opt[0]] = dargs[opt[0]] + if not (args.reuse or args.restart or args.optimize): + Options.algo = 0 + else: + Options.algo = 0 if args.optimize else (1 if args.reuse else 2) + Options.depts(depts) + return Options + + +def merge_options(Options, jsondict): + if "override" in jsondict: + Options["override"] = jsondict["override"] + if "orders" in jsondict: + Options["orders"] = jsondict["orders"] def api_url(dept: str | None = None): @@ -161,9 +526,6 @@ def api_url(dept: str | None = None): ) -cli_check() - - def read_conf(key): if os.path.exists(f"{key}.json"): with open(f"{key}.json", "r") as f: @@ -177,44 +539,11 @@ def write_conf(key, obj): return {} -conf = read_conf(orderkey) +Options = options_from_args(cli_check()) +orderkey = Options.orderkey() +depts = Options.depts() -defaults = { - "spacing": 14, - "thickness": 6, - "fontsize_name": 10, - "fontsize_count": 14, - "width": 1300, - "height": 0, - "hmargin": 20, - "parcours_separator": "/", - "year_separator": " ", - "rank_separator": "", - "diplome_separator": "", - "reuse": "yes", - "optimize": "yes", - "main_filter": 0, - "secondary_filter": 1, -} - - -def conf_value(xkey: str): - """Manage default values""" - if xkey in conf: - return conf[xkey] - if xkey in defaults: - return defaults[xkey] - if xkey[-9:] == "separator": - return " " - if xkey == "nick": - return "{diplome}{rank}{multidepartment}{modalite}{parcours}{year}" - if xkey == "displayname": - return "{diplome}{rank}{multidepartment}{modaliteshort}{parcours}" - if xkey == "extnick": - return "{ext}{rank}{multidepartment}{diplomenobut}{modaliteshort}" - if xkey == "orders": - return [[], [], [], [], []] - return {} +defaults = {} student = {} @@ -393,7 +722,9 @@ def get_jury_from_formsem(dept: str, semid): def get_override(sem, xkey, default=None): - overrides = conf_value("override") + if "overrides" not in Options: + return default + overrides = Options.override for j in ["titre_num", "titre", "session_id"]: if ( j in sem @@ -406,46 +737,44 @@ def get_override(sem, xkey, default=None): def nick_replace( - department, diplome, rank, modalite, parcours, nick, year=Options.base_year + department, diplome, rank, modalite, parcours, nick, year=Options.baseyear ): if type(rank) != int: rank = 0 if len(department) > 0: - nick = nick.replace( - "{department}", conf_value("department_separator") + department - ) + nick = nick.replace("{department}", Options.department_separator + department) else: nick = nick.replace("{department}", "") if len(department) > 0 and len(depts) > 1: nick = nick.replace( - "{multidepartment}", conf_value("department_separator") + department + "{multidepartment}", Options.department_separator + department ) else: nick = nick.replace("{multidepartment}", "") if len(diplome) > 0: - nick = nick.replace("{diplome}", conf_value("diplome_separator") + diplome) + nick = nick.replace("{diplome}", Options.diplome_separator + diplome) else: nick = nick.replace("{diplome}", "") if len(diplome) > 0 and diplome != "BUT": - nick = nick.replace("{diplomenobut}", conf_value("diplome_separator") + diplome) + nick = nick.replace("{diplomenobut}", Options.diplome_separator + diplome) else: nick = nick.replace("{diplomenobut}", "") if rank > 0: - nick = nick.replace("{rank}", conf_value("rank_separator") + str(rank)) + nick = nick.replace("{rank}", Options.rank_separator + str(rank)) else: nick = nick.replace("{rank}", "") nick = nick.replace( - "{year}", conf_value("year_separator") + str(Options.base_year + rank - 1) + "{year}", Options.year_separator + str(Options.baseyear + rank - 1) ) if diplome != "BUT": nick = nick.replace( "{yearnobut}", - conf_value("year_separator") + str(Options.base_year + rank - 1), + Options.year_separator + str(Options.baseyear + rank - 1), ) else: nick = nick.replace("{yearnobut}", "") if len(modalite) > 0: - nick = nick.replace("{modalite}", conf_value("modalite_separator") + modalite) + nick = nick.replace("{modalite}", Options.modalite_separator + modalite) else: nick = nick.replace("{modalite}", "") if len(modalite) > 0 and modalite != "FI": @@ -453,7 +782,7 @@ def nick_replace( else: nick = nick.replace("{modaliteshort}", "") if len(parcours) > 0: - nick = nick.replace("{parcours}", conf_value("parcours_separator") + parcours) + nick = nick.replace("{parcours}", Options.parcours_separator + parcours) else: nick = nick.replace("{parcours}", "") extname = "Ecand " @@ -499,9 +828,9 @@ def analyse_student(semobj, etud, univ_year=None): gg = g.split("=") if gg[0] in groups: modalite = gg[1] - nick = conf_value("nick") + nick = Options.nick nick = nick_replace(department, diplome, rank, modalite, parcours, nick, year) - displayname = conf_value("displayname") + displayname = Options.displayname displayname = nick_replace( department, diplome, rank, modalite, parcours, displayname, year ) @@ -538,7 +867,7 @@ def analyse_depts(): year = 1 else: year = (sem["semestre_id"] + 1) // 2 - offset = sem["annee_scolaire"] - Options.base_year - year + 1 + offset = sem["annee_scolaire"] - Options.baseyear - year + 1 if offset < 0 and offset > -4: oldsems.add(str(semid)) oldsemsdept[semid] = dept @@ -992,7 +1321,7 @@ for etudid in student.keys(): elif resultyear in ("NAR", "DEM", "DEF", "ABAN"): finaloutput = "FAIL" failure[ddd] += 1 - elif resjury["annee"]["annee_scolaire"] != Options.base_year + lastyear - 1: + elif resjury["annee"]["annee_scolaire"] != Options.baseyear + lastyear - 1: finaloutput = "RED" checkred = True if checkred: @@ -1019,7 +1348,7 @@ for etudid in student.keys(): yearold = cache["sem"][etud["oldsem"]]["annee_scolaire"] etud["nickshort"][firstyear - 1] = etud["old"] # yy = yearold - # delta = firstyear + Options.base_year - yy - 2 + # delta = firstyear + Options.baseyear - yy - 2 # for i in range(delta, firstyear - 1): # etud["nickshort"][i] = etud["nickshort"][firstyear - 1] + "*" * ( # firstyear - 1 - i @@ -1037,9 +1366,9 @@ for etudid in student.keys(): rank = etud["rank"][startsem] modalite = etud["modalite"][startsem] parcours = etud["parcours"][startsem] - nick = "EXT" + conf_value("nick") + nick = "EXT" + Options.nick nick = nick_replace(department, diplome, rank, modalite, parcours, nick) - displayname = conf_value("extnick") + displayname = Options.extnick displayname = nick_replace( department, diplome, rank, modalite, parcours, displayname ) @@ -1052,18 +1381,6 @@ for etudid in student.keys(): entries[ddd] += 1 -class Filter: - # Filter on students to be considered - # 1 consider only technological baccalaureates, statistics are always asked - # 2 consider only women, because gender statistics are frequently asked - # 4 consider only incoming students (primo-entrants) in first year of the cohort - # 8 consider only people having a first year, not parallel entries - TECHNO = 1 - WOMAN = 2 - PRIMO = 4 - MAIN = 8 - - def bags_from_students(student, filter=0): bags = [] for etudid in student.keys(): @@ -1244,7 +1561,7 @@ def ordernodes(layers, orders, edges): for j, n in enumerate(layernodes): node_position[n] = j node_layer[n] = layer - debug(crossweight(node_position, node_layer, edges)) + debug("Solution has weight" + str(crossweight(node_position, node_layer, edges))) return node_position, node_layer, newls @@ -1361,10 +1678,10 @@ def nodestructure_from_bags(bags, sbags=None): def compute_svg(height, padding, realdensity, node_structure): unit_ratio = 96 / 72 - thickness = conf_value("thickness") - fontsize_name = conf_value("fontsize_name") - fontsize_count = conf_value("fontsize_count") - width = conf_value("width") + thickness = Options.thickness + fontsize_name = Options.fontsize_name + fontsize_count = Options.fontsize_count + width = Options.width columns = [] l = 0 for i in range(5): @@ -1525,30 +1842,32 @@ def compute_svg(height, padding, realdensity, node_structure): def printsvg(): padding = 4 - spacing = conf_value("spacing") - height = conf_value("height") - hmargin = conf_value("hmargin") - bags = bags_from_students(student, conf_value("main_filter")) - sbags = bags_from_students(student, conf_value("secondary_filter")) + spacing = Options.spacing + height = Options.height + hmargin = Options.hmargin + bags = bags_from_students(student, Options.filter()) + sbags = bags_from_students(student, Options.filter(main=False)) node_structure, layers, edges = nodestructure_from_bags(bags, sbags) filename = "best-" + orderkey - if Options.restart: + if Options.algo == 2: try: os.remove(filename) except OSError: pass - if Options.optimize >= 0: + if Options.algo < 2: lastorders = read_conf(filename) else: lastorders = {} node_position, node_layer, newls = ordernodes(layers, lastorders, edges) - if Options.optimize != 0: - orders = genetic_optimize( - node_position, node_layer, edges, loops=abs(Options.optimize) - ) + if Options.algo != 1: + orders = genetic_optimize(node_position, node_layer, edges, loops=Options.loops) else: orders = newls write_conf("best-" + orderkey, orders) + info(format_for_shell(OptionSet({"orders": orders}).asCLI(onlyOrders=True))) + Options.orders = orders + write_conf(Options.orderkey(filters=True), Options.asCLI()) + info(format_for_shell(OptionSet({"orders": orders}).asCLI(onlyOrders=True))) node_position, node_layer, newls = ordernodes(layers, orders, edges) realdensity, height, layer_structure = get_layer_structure( newls, node_structure, spacing, hmargin, height