2021-04-02 14:59:08 +02:00
|
|
|
|
from officiel import supprime_accent_espace
|
2021-04-05 12:05:04 +02:00
|
|
|
|
import unicodedata
|
2021-04-02 14:59:08 +02:00
|
|
|
|
|
|
|
|
|
def get_indice(champ, entetes):
|
|
|
|
|
"""Récupère l'indice d'une entête"""
|
|
|
|
|
for (i, entete) in enumerate(entetes):
|
|
|
|
|
if entete in champ:
|
|
|
|
|
return i
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_indice_sans_accent_ni_espace(champ, entetes):
|
|
|
|
|
"""Récupère l'indice d'une entête en se débarrassant des majuscules/caractères spéciaux/espace"""
|
|
|
|
|
champ_purge = supprime_accent_espace(champ).rstrip()
|
|
|
|
|
for (i, entete) in enumerate(entetes):
|
|
|
|
|
entete_purge = supprime_accent_espace(entete).rstrip()
|
|
|
|
|
if entete_purge in champ_purge:
|
|
|
|
|
return i
|
2021-04-02 15:24:56 +02:00
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def caracteres_recalcitrants(contenu):
|
2021-04-05 12:05:04 +02:00
|
|
|
|
|
|
|
|
|
# contenu = contenu.replace("\'", "'")
|
2021-04-03 09:53:25 +02:00
|
|
|
|
contenu = contenu.replace("è", "è")
|
|
|
|
|
contenu = contenu.replace("é", "é")
|
2021-04-02 15:24:56 +02:00
|
|
|
|
contenu = contenu.replace("â", "â").replace(b'a\xcc\x82'.decode("utf8"), "â")
|
2021-04-05 12:05:04 +02:00
|
|
|
|
# contenu = unicodedata.normalize("NFKD", contenu)
|
|
|
|
|
contenu = contenu.replace("’", "'") #.replace(b"\xe2\x80\x99".decode("utf8"), "'")
|
2021-04-02 15:24:56 +02:00
|
|
|
|
contenu = contenu.replace('\xa0', ' ') # le nbsp
|
2021-04-05 12:05:04 +02:00
|
|
|
|
# contenu = contenu.encode("utf8", "ignore").decode("utf8")
|
2021-04-02 16:08:14 +02:00
|
|
|
|
|
2021-04-02 15:24:56 +02:00
|
|
|
|
return contenu
|