From 1034c096e7d231636b078b9dc2a41dbdae7e51a2 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Sun, 11 Jul 2021 22:00:41 +0200 Subject: [PATCH] replaced old stripogram by a new HTML parser --- app/scodoc/safehtml.py | 75 ++++++++++++++++++++++++++++++++++++-- app/scodoc/sco_debouche.py | 2 +- app/scodoc/sco_etud.py | 4 +- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/app/scodoc/safehtml.py b/app/scodoc/safehtml.py index 46171975..68e7f44b 100644 --- a/app/scodoc/safehtml.py +++ b/app/scodoc/safehtml.py @@ -1,13 +1,80 @@ -from stripogram import html2text, html2safehtml +# -*- mode: python -*- +# -*- coding: utf-8 -*- -# permet de conserver quelques tags html -def HTML2SafeHTML(text, convert_br=True): - text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p")) +############################################################################## +# +# Gestion scolarite IUT +# +# Copyright (c) 1999 - 2021 Emmanuel Viennet. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# Emmanuel Viennet emmanuel.viennet@viennet.net +# +############################################################################## + +from html.parser import HTMLParser + + +"""HTML sanitizing function + used to clean user submitted HTML + (Python 3 only) +""" + +# permet de conserver les liens +def html_to_safe_html(text, convert_br=True): + # text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p")) + # New version (jul 2021) with our own parser + text = convert_html_to_text(text) if convert_br: return newline_to_br(text) else: return text +def convert_html_to_text(s): + parser = HTMLSanitizer() + parser.feed(s) + return parser.text + + def newline_to_br(text): return text.replace("\n", "
") + + +class HTMLSanitizer(HTMLParser): + def __init__(self, allowed_tags=("i", "b", "em", "br", "p"), **kwargs): + super(HTMLSanitizer, self).__init__(**kwargs) + self.allowed_tags = set(allowed_tags) + self.text = "" + + def handle_starttag(self, tag, attrs): + if tag in self.allowed_tags: + self.text += "<{} {}>".format( + tag, ", ".join(['{}="{}"'.format(k, v) for (k, v) in attrs]) + ) + + def handle_endtag(self, tag): + if tag in self.allowed_tags: + self.text += "" + + def handle_data(self, data): + self.text += data + + +if __name__ == "__main__": + test_parser = HTMLSanitizer() + test_parser.feed("""

Hello world gras italique

""") + print(test_parser.text) diff --git a/app/scodoc/sco_debouche.py b/app/scodoc/sco_debouche.py index cb445d43..3e64f653 100644 --- a/app/scodoc/sco_debouche.py +++ b/app/scodoc/sco_debouche.py @@ -239,7 +239,7 @@ _itemsuiviEditor = ndb.EditableTable( sortkey="item_date desc", convert_null_outputs_to_empty=True, output_formators={ - "situation": safehtml.HTML2SafeHTML, + "situation": safehtml.html_to_safe_html, "item_date": ndb.DateISOtoDMY, }, input_formators={"item_date": ndb.DateDMYtoISO}, diff --git a/app/scodoc/sco_etud.py b/app/scodoc/sco_etud.py index f99fa5a3..6af51b73 100644 --- a/app/scodoc/sco_etud.py +++ b/app/scodoc/sco_etud.py @@ -768,7 +768,7 @@ _etud_annotationsEditor = ndb.EditableTable( ), sortkey="date desc", convert_null_outputs_to_empty=True, - output_formators={"comment": safehtml.HTML2SafeHTML, "date": ndb.DateISOtoDMY}, + output_formators={"comment": safehtml.html_to_safe_html, "date": ndb.DateISOtoDMY}, ) @@ -807,7 +807,7 @@ _appreciationsEditor = ndb.EditableTable( ), sortkey="date desc", convert_null_outputs_to_empty=True, - output_formators={"comment": safehtml.HTML2SafeHTML, "date": ndb.DateISOtoDMY}, + output_formators={"comment": safehtml.html_to_safe_html, "date": ndb.DateISOtoDMY}, ) appreciations_create = _appreciationsEditor.create