diff --git a/tools/anonymize_db.py b/tools/anonymize_db.py index 2e5040a25..1d95a11b0 100755 --- a/tools/anonymize_db.py +++ b/tools/anonymize_db.py @@ -33,7 +33,6 @@ Runned as user "scodoc" with scodoc and postgresql up. E. Viennet, Jan 2019 """ -import os import psycopg2 import sys import traceback @@ -45,6 +44,13 @@ def log(msg): sys.stderr.flush() +def usage(): + sys.stdout.flush() + sys.stderr.flush() + print(f"Usage: {sys.argv[0]} [--users] dbname", file=sys.stderr) + sys.exit(1) + + # --- Fonctions d'Anonymisation, en SQL anonymize_name = "random_text_md5(8)" @@ -52,15 +58,11 @@ anonymize_date = "'1970-01-01'" anonymize_question_str = "'?'" anonymize_null = "NULL" -# aggregate_length = lambda column, _: 'length({})'.format(column) - - # --- Champs à anonymiser (cette configuration pourrait être placé dans # un fichier séparé et le code serait alors générique pour toute base # posgresql. # # On essaie de retirer les données personnelles des étudiants et des entreprises -# TODO: L'identité (login) des enseignants n'est pas modifiée # # ANONYMIZED_FIELDS = { @@ -102,17 +104,23 @@ def anonymize_column(cursor, tablecolumn): remplacé par nom_valeur_de_la_clé. """ table, column = tablecolumn.split(".") - anonymization = ANONYMIZED_FIELDS[tablecolumn] - log("processing {}".format(tablecolumn)) - cursor.execute( - "UPDATE {table} SET {column} = {value};".format( - table=table, - column=column, - value=anonymization(column, key_name) - if callable(anonymization) - else anonymization, - ) - ) + anonymized = ANONYMIZED_FIELDS[tablecolumn] + log(f"processing {tablecolumn}") + cursor.execute(f"UPDATE {table} SET {column} = {anonymized};") + + +def anonymize_users(cursor): + """Anonymise la table utilisateurs""" + log("processing user table") + cursor.execute("""UPDATE "user" SET email = 'x@y.fr';""") + cursor.execute("""UPDATE "user" SET password_hash = '*';""") + cursor.execute("""UPDATE "user" SET password_scodoc7 = NULL;""") + cursor.execute("""UPDATE "user" SET date_created = '2001-01-01';""") + cursor.execute("""UPDATE "user" SET date_expiration = '2201-12-31';""") + cursor.execute("""UPDATE "user" SET token = NULL;""") + cursor.execute("""UPDATE "user" SET token_expiration = NULL;""") + cursor.execute("""UPDATE "user" SET nom=CONCAT('nom_', id);""") + cursor.execute("""UPDATE "user" SET prenom=CONCAT('nom_', id);""") def anonymize_db(cursor): @@ -121,21 +129,32 @@ def anonymize_db(cursor): anonymize_column(cursor, tablecolumn) -dbname = sys.argv[1] +process_users = False +if len(sys.argv) < 2 or len(sys.argv) > 3: + usage() +if len(sys.argv) > 2: + if sys.argv[1] != "--users": + usage() + dbname = sys.argv[2] + process_users = True +else: + dbname = sys.argv[1] -log("\nAnonymizing database %s" % dbname) +log(f"\nAnonymizing database {dbname}") cnx_string = "dbname=" + dbname try: cnx = psycopg2.connect(cnx_string) -except: - log("\n*** Error: can't connect to database %s ***\n" % dbname) - log('connexion string was "%s"' % cnx_string) +except Exception as e: + log(f"\n*** Error: can't connect to database {dbname} ***\n") + log(f"""connexion string was "{cnx_string}" """) traceback.print_exc() cnx.set_session(autocommit=False) cursor = cnx.cursor() anonymize_db(cursor) +if process_users: + anonymize_users(cursor) cnx.commit() cnx.close()