From 2f2a06048cac62a1516bca3c1db9f4efc9353248 Mon Sep 17 00:00:00 2001 From: Emmanuel Viennet Date: Sun, 11 Jul 2021 22:02:22 +0200 Subject: [PATCH] replaced old stripogram by a new HTML parser --- app/scodoc/safehtml.py | 4 ++-- app/scodoc/sco_news.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/app/scodoc/safehtml.py b/app/scodoc/safehtml.py index 68e7f44b..f9deecfe 100644 --- a/app/scodoc/safehtml.py +++ b/app/scodoc/safehtml.py @@ -34,8 +34,8 @@ from html.parser import HTMLParser """ # permet de conserver les liens -def html_to_safe_html(text, convert_br=True): - # text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p")) +def html_to_safe_html(text, convert_br=True): # was HTML2SafeHTML + # text = html_to_safe_html(text, valid_tags=("b", "a", "i", "br", "p")) # New version (jul 2021) with our own parser text = convert_html_to_text(text) if convert_br: diff --git a/app/scodoc/sco_news.py b/app/scodoc/sco_news.py index cd462dd3..9baf2a80 100644 --- a/app/scodoc/sco_news.py +++ b/app/scodoc/sco_news.py @@ -35,7 +35,6 @@ from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.header import Header from operator import itemgetter -from stripogram import html2text import PyRSS2Gen # pylint: disable=import-error import app.scodoc.sco_utils as scu @@ -232,7 +231,7 @@ def scolar_news_summary_rss(context, title, sco_url, n=5): news = scolar_news_summary(context, n=n) items = [] for n in news: - text = html2text(n["text"]) + text = safehtml.convert_html_to_text(n["text"]) items.append( PyRSS2Gen.RSSItem( title=six.text_type("%s %s" % (n["rssdate"], text), SCO_ENCODING),