1
0
forked from ScoDoc/ScoDoc

replaced old stripogram by a new HTML parser

This commit is contained in:
Emmanuel Viennet 2021-07-11 22:02:22 +02:00
parent 1034c096e7
commit 2f2a06048c
2 changed files with 3 additions and 4 deletions

View File

@ -34,8 +34,8 @@ from html.parser import HTMLParser
""" """
# permet de conserver les liens # permet de conserver les liens
def html_to_safe_html(text, convert_br=True): def html_to_safe_html(text, convert_br=True): # was HTML2SafeHTML
# text = html2safehtml(text, valid_tags=("b", "a", "i", "br", "p")) # text = html_to_safe_html(text, valid_tags=("b", "a", "i", "br", "p"))
# New version (jul 2021) with our own parser # New version (jul 2021) with our own parser
text = convert_html_to_text(text) text = convert_html_to_text(text)
if convert_br: if convert_br:

View File

@ -35,7 +35,6 @@ from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.header import Header from email.header import Header
from operator import itemgetter from operator import itemgetter
from stripogram import html2text
import PyRSS2Gen # pylint: disable=import-error import PyRSS2Gen # pylint: disable=import-error
import app.scodoc.sco_utils as scu import app.scodoc.sco_utils as scu
@ -232,7 +231,7 @@ def scolar_news_summary_rss(context, title, sco_url, n=5):
news = scolar_news_summary(context, n=n) news = scolar_news_summary(context, n=n)
items = [] items = []
for n in news: for n in news:
text = html2text(n["text"]) text = safehtml.convert_html_to_text(n["text"])
items.append( items.append(
PyRSS2Gen.RSSItem( PyRSS2Gen.RSSItem(
title=six.text_type("%s %s" % (n["rssdate"], text), SCO_ENCODING), title=six.text_type("%s %s" % (n["rssdate"], text), SCO_ENCODING),