replaced old stripogram by a new HTML parser

2021-07-11 22:02:22 +02:00 · 2021-07-11 22:02:22 +02:00 · 2f2a06048c
commit 2f2a06048c
parent 1034c096e7
2 changed files with 3 additions and 4 deletions
--- a/app/scodoc/safehtml.py
+++ b/app/scodoc/safehtml.py
@ -34,8 +34,8 @@ from html.parser import HTMLParser
 """

 # permet de conserver les liens
-def html_to_safe_html(text, convert_br=True):
-    # text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p"))
+def html_to_safe_html(text, convert_br=True):  # was HTML2SafeHTML
+    # text = html_to_safe_html(text, valid_tags=("b", "a", "i", "br", "p"))
    # New version (jul 2021) with our own parser
    text = convert_html_to_text(text)
    if convert_br:
--- a/app/scodoc/sco_news.py
+++ b/app/scodoc/sco_news.py
@ -35,7 +35,6 @@ from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText
 from email.header import Header
 from operator import itemgetter
-from stripogram import html2text
 import PyRSS2Gen  # pylint: disable=import-error

 import app.scodoc.sco_utils as scu
@ -232,7 +231,7 @@ def scolar_news_summary_rss(context, title, sco_url, n=5):
    news = scolar_news_summary(context, n=n)
    items = []
    for n in news:
-        text = html2text(n["text"])
+        text = safehtml.convert_html_to_text(n["text"])
        items.append(
            PyRSS2Gen.RSSItem(
                title=six.text_type("%s %s" % (n["rssdate"], text), SCO_ENCODING),