1
0
forked from ScoDoc/ScoDoc

replaced old stripogram by a new HTML parser

This commit is contained in:
Emmanuel Viennet 2021-07-11 22:02:22 +02:00
parent 1034c096e7
commit 2f2a06048c
2 changed files with 3 additions and 4 deletions

View File

@ -34,8 +34,8 @@ from html.parser import HTMLParser
"""
# permet de conserver les liens
def html_to_safe_html(text, convert_br=True):
# text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p"))
def html_to_safe_html(text, convert_br=True): # was HTML2SafeHTML
# text = html_to_safe_html(text, valid_tags=("b", "a", "i", "br", "p"))
# New version (jul 2021) with our own parser
text = convert_html_to_text(text)
if convert_br:

View File

@ -35,7 +35,6 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.header import Header
from operator import itemgetter
from stripogram import html2text
import PyRSS2Gen # pylint: disable=import-error
import app.scodoc.sco_utils as scu
@ -232,7 +231,7 @@ def scolar_news_summary_rss(context, title, sco_url, n=5):
news = scolar_news_summary(context, n=n)
items = []
for n in news:
text = html2text(n["text"])
text = safehtml.convert_html_to_text(n["text"])
items.append(
PyRSS2Gen.RSSItem(
title=six.text_type("%s %s" % (n["rssdate"], text), SCO_ENCODING),