DocScoDoc/app/scodoc/gen_tables.py

782 lines
27 KiB
Python

# -*- mode: python -*-
# -*- coding: utf-8 -*-
##############################################################################
#
# Gestion scolarite IUT
#
# Copyright (c) 1999 - 2022 Emmanuel Viennet. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Emmanuel Viennet emmanuel.viennet@viennet.net
#
##############################################################################
"""Géneration de tables aux formats XHTML, PDF, Excel, XML et JSON.
Les données sont fournies comme une liste de dictionnaires, chaque élément de
cette liste décrivant une ligne du tableau.
Chaque colonne est identifiée par une clé du dictionnaire.
Voir exemple en fin de ce fichier.
Les clés commençant par '_' sont réservées. Certaines altèrent le traitement, notamment
pour spécifier les styles de mise en forme.
Par exemple, la clé '_css_row_class' spécifie le style CSS de la ligne.
"""
from __future__ import print_function
import random
from collections import OrderedDict
from xml.etree import ElementTree
import json
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Frame, PageBreak
from reportlab.platypus import Table, TableStyle, Image, KeepInFrame
from reportlab.lib.colors import Color
from reportlab.lib import styles
from reportlab.lib.units import inch, cm, mm
from reportlab.rl_config import defaultPageSize # pylint: disable=no-name-in-module
from app.scodoc import html_sco_header
from app.scodoc import sco_utils as scu
from app.scodoc import sco_excel
from app.scodoc import sco_pdf
from app.scodoc import sco_xml
from app.scodoc.sco_exceptions import ScoPDFFormatError
from app.scodoc.sco_pdf import SU
from app import log
def mark_paras(L, tags) -> list[str]:
"""Put each (string) element of L between <tag>...</tag>,
for each supplied tag.
Leave non string elements untouched.
"""
for tag in tags:
start = "<" + tag + ">"
end = "</" + tag.split()[0] + ">"
L = [(start + (x or "") + end) if isinstance(x, str) else x for x in L]
return L
class DEFAULT_TABLE_PREFERENCES(object):
"""Default preferences for tables created without preferences argument"""
values = {
"SCOLAR_FONT": "Helvetica", # used for PDF, overriden by preferences argument
"SCOLAR_FONT_SIZE": 10,
"SCOLAR_FONT_SIZE_FOOT": 6,
"bul_pdf_with_background": False,
}
def __getitem__(self, k):
return self.values[k]
class GenTable(object):
"""Simple 2D tables with export to HTML, PDF, Excel, CSV.
Can be sub-classed to generate fancy formats.
"""
default_css_class = "gt_table stripe cell-border compact hover order-column"
def __init__(
self,
rows=[{}], # liste de dict { column_id : value }
columns_ids=[], # id des colonnes a afficher, dans l'ordre
titles={}, # titres (1ere ligne)
bottom_titles={}, # titres derniere ligne (optionnel)
caption=None,
page_title="", # titre fenetre html
pdf_link=True,
xls_link=True,
xml_link=False,
table_id=None, # for html and xml
html_class=None, # class de l'element <table> (en plus des classes par defaut,
html_class_ignore_default=False, # sauf si html_class_ignore_default est vrai)
html_sortable=False,
html_highlight_n=2, # une ligne sur 2 de classe "gt_hl"
html_col_width=None, # force largeur colonne
html_generate_cells=True, # generate empty <td> cells even if not in rows (useless?)
html_title="", # avant le tableau en html
html_caption=None, # override caption if specified
html_header=None,
html_next_section="", # html fragment to put after the table
html_with_td_classes=False, # put class=column_id in each <td>
html_before_table="", # html snippet to put before the <table> in the page
html_empty_element="", # replace table when empty
html_table_attrs="", # for html
base_url=None,
origin=None, # string added to excel and xml versions
filename="table", # filename, without extension
xls_sheet_name="feuille",
xls_before_table=[], # liste de cellules a placer avant la table
pdf_title="", # au dessus du tableau en pdf
pdf_table_style=None,
pdf_col_widths=None,
xml_outer_tag="table",
xml_row_tag="row",
text_with_titles=False, # CSV with header line
text_fields_separator="\t",
preferences=None,
):
self.rows = rows # [ { col_id : value } ]
self.columns_ids = columns_ids # ordered list of col_id
self.titles = titles # { col_id : title }
self.bottom_titles = bottom_titles
self.origin = origin
self.base_url = base_url
self.filename = filename
self.caption = caption
self.html_header = html_header
self.html_before_table = html_before_table
self.html_empty_element = html_empty_element
self.html_table_attrs = html_table_attrs
self.page_title = page_title
self.pdf_link = pdf_link
self.xls_link = xls_link
self.xml_link = xml_link
# HTML parameters:
if not table_id: # random id
self.table_id = "gt_" + str(random.randint(0, 1000000))
else:
self.table_id = table_id
self.html_generate_cells = html_generate_cells
self.html_title = html_title
self.html_caption = html_caption
self.html_next_section = html_next_section
self.html_with_td_classes = html_with_td_classes
if html_class is None:
html_class = self.default_css_class
if html_class_ignore_default:
self.html_class = html_class
else:
self.html_class = self.default_css_class + " " + html_class
self.sortable = html_sortable
self.html_highlight_n = html_highlight_n
self.html_col_width = html_col_width
# XLS parameters
self.xls_sheet_name = xls_sheet_name
self.xls_before_table = xls_before_table
# PDF parameters
self.pdf_table_style = pdf_table_style
self.pdf_col_widths = pdf_col_widths
self.pdf_title = pdf_title
# XML parameters
self.xml_outer_tag = xml_outer_tag
self.xml_row_tag = xml_row_tag
# TEXT parameters
self.text_fields_separator = text_fields_separator
self.text_with_titles = text_with_titles
#
if preferences:
self.preferences = preferences
else:
self.preferences = DEFAULT_TABLE_PREFERENCES()
def __repr__(self):
return f"<gen_table( nrows={self.get_nb_rows()}, ncols={self.get_nb_cols()} )>"
def get_nb_cols(self):
return len(self.columns_ids)
def get_nb_rows(self):
return len(self.rows)
def is_empty(self):
return len(self.rows) == 0
def get_data_list(
self,
with_titles=False,
with_lines_titles=True,
with_bottom_titles=True,
omit_hidden_lines=False,
pdf_mode=False, # apply special pdf reportlab processing
pdf_style_list=[], # modified: list of platypus table style commands
xls_mode=False, # get xls content if available
) -> list:
"table data as a list of lists (rows)"
T = []
line_num = 0 # line number in input data
out_line_num = 0 # line number in output list
if with_titles and self.titles:
l = []
if with_lines_titles:
if "row_title" in self.titles:
l = [self.titles["row_title"]]
T.append(l + [self.titles.get(cid, "") for cid in self.columns_ids])
for row in self.rows:
line_num += 1
l = []
if with_lines_titles:
if "row_title" in row:
l = [row["row_title"]]
if not (omit_hidden_lines and row.get("_hidden", False)):
colspan_count = 0
col_num = len(l)
for cid in self.columns_ids:
colspan_count -= 1
# if colspan_count > 0:
# continue # skip cells after a span
if pdf_mode:
content = row.get(f"_{cid}_pdf", False) or row.get(cid, "")
elif xls_mode:
content = row.get(f"_{cid}_xls", False) or row.get(cid, "")
else:
content = row.get(cid, "")
# Convert None to empty string ""
content = "" if content is None else content
colspan = row.get("_%s_colspan" % cid, 0)
if colspan > 1:
pdf_style_list.append(
(
"SPAN",
(col_num, out_line_num),
(col_num + colspan - 1, out_line_num),
)
)
colspan_count = colspan
l.append(content)
col_num += 1
if pdf_mode:
mk = row.get("_pdf_row_markup", []) # a list of tags
if mk:
l = mark_paras(l, mk)
T.append(l)
#
for cmd in row.get("_pdf_style", []): # relocate line numbers
pdf_style_list.append(
(
cmd[0],
(cmd[1][0], cmd[1][1] + out_line_num),
(cmd[2][0], cmd[2][1] + out_line_num),
)
+ cmd[3:]
)
out_line_num += 1
if with_bottom_titles and self.bottom_titles:
line_num += 1
l = []
if with_lines_titles:
if "row_title" in self.bottom_titles:
l = [self.bottom_titles["row_title"]]
T.append(l + [self.bottom_titles.get(cid, "") for cid in self.columns_ids])
return T
def get_titles_list(self):
"list of titles"
return [self.titles.get(cid, "") for cid in self.columns_ids]
def gen(self, format="html", columns_ids=None):
"""Build representation of the table in the specified format.
See make_page() for more sophisticated output.
"""
if format == "html":
return self.html()
elif format == "xls" or format == "xlsx":
return self.excel()
elif format == "text" or format == "csv":
return self.text()
elif format == "pdf":
return self.pdf()
elif format == "xml":
return self.xml()
elif format == "json":
return self.json()
raise ValueError(f"GenTable: invalid format: {format}")
def _gen_html_row(self, row, line_num=0, elem="td", css_classes=""):
"row is a dict, returns a string <tr...>...</tr>"
if not row:
return "<tr></tr>" # empty row
if self.html_col_width:
std = ' style="width:%s;"' % self.html_col_width
else:
std = ""
cla = css_classes + " " + row.get("_css_row_class", "")
if line_num % self.html_highlight_n:
cls = ' class="gt_hl %s"' % cla
else:
if cla:
cls = ' class="%s"' % cla
else:
cls = ""
H = ["<tr%s %s>" % (cls, row.get("_tr_attrs", ""))]
# titre ligne
if "row_title" in row:
content = str(row["row_title"])
help = row.get("row_title_help", "")
if help:
content = '<a class="discretelink" href="" title="%s">%s</a>' % (
help,
content,
)
H.append('<th class="gt_linetit">' + content + "</th>")
r = []
colspan_count = 0
for cid in self.columns_ids:
if not cid in row and not self.html_generate_cells:
continue # skip cell
colspan_count -= 1
if colspan_count > 0:
continue # skip cells after a span
content = row.get("_" + str(cid) + "_html", row.get(cid, ""))
if content is None:
content = ""
else:
content = str(content)
help = row.get("_%s_help" % cid, "")
if help:
target = row.get("_%s_target" % cid, "#")
else:
target = row.get("_%s_target" % cid, "")
cell_id = row.get("_%s_id" % cid, None)
if cell_id:
idstr = ' id="%s"' % cell_id
else:
idstr = ""
cell_link_class = row.get("_%s_link_class" % cid, "discretelink")
if help or target:
content = '<a class="%s" href="%s" title="%s"%s>%s</a>' % (
cell_link_class,
target,
help,
idstr,
content,
)
klass = row.get("_%s_class" % cid, "")
if self.html_with_td_classes:
c = cid
else:
c = ""
if c or klass:
klass = ' class="%s"' % (" ".join((klass, c)))
else:
klass = ""
colspan = row.get("_%s_colspan" % cid, 0)
if colspan > 1:
colspan_txt = ' colspan="%d" ' % colspan
colspan_count = colspan
else:
colspan_txt = ""
r.append(
"<%s%s %s%s%s>%s</%s>"
% (
elem,
std,
row.get("_%s_td_attrs" % cid, ""),
klass,
colspan_txt,
content,
elem,
)
)
H.append("".join(r) + "</tr>")
return "".join(H)
def html(self):
"Simple HTML representation of the table"
if self.is_empty() and self.html_empty_element:
return self.html_empty_element + "\n" + self.html_next_section
hid = ' id="%s"' % self.table_id
tablclasses = []
if self.html_class:
tablclasses.append(self.html_class)
if self.sortable:
tablclasses.append("sortable")
if tablclasses:
cls = ' class="%s"' % " ".join(tablclasses)
else:
cls = ""
H = [self.html_before_table, f"<table{hid}{cls} {self.html_table_attrs}>"]
line_num = 0
# thead
H.append("<thead>")
if self.titles:
H.append(
self._gen_html_row(
self.titles, line_num, elem="th", css_classes="gt_firstrow"
)
)
# autres lignes à placer dans la tête:
for row in self.rows:
if row.get("_table_part") == "head":
line_num += 1
H.append(self._gen_html_row(row, line_num)) # uses td elements
H.append("</thead>")
H.append("<tbody>")
for row in self.rows:
if row.get("_table_part", "body") == "body":
line_num += 1
H.append(self._gen_html_row(row, line_num))
H.append("</tbody>")
H.append("<tfoot>")
for row in self.rows:
if row.get("_table_part") == "foot":
line_num += 1
H.append(self._gen_html_row(row, line_num))
if self.bottom_titles:
H.append(
self._gen_html_row(
self.bottom_titles,
line_num + 1,
elem="th",
css_classes="gt_lastrow sortbottom",
)
)
H.append("</tfoot>")
H.append("</table>")
caption = self.html_caption or self.caption
if caption or self.base_url:
H.append('<p class="gt_caption">')
if caption:
H.append(caption)
if self.base_url:
H.append('<span class="gt_export_icons">')
if self.xls_link:
H.append(
' <a href="%s&format=xls">%s</a>'
% (self.base_url, scu.ICON_XLS)
)
if self.xls_link and self.pdf_link:
H.append("&nbsp;")
if self.pdf_link:
H.append(
' <a href="%s&format=pdf">%s</a>'
% (self.base_url, scu.ICON_PDF)
)
H.append("</span>")
H.append("</p>")
H.append(self.html_next_section)
return "\n".join(H)
def excel(self, wb=None):
"""Simple Excel representation of the table"""
if wb is None:
sheet = sco_excel.ScoExcelSheet(sheet_name=self.xls_sheet_name, wb=wb)
else:
sheet = wb.create_sheet(sheet_name=self.xls_sheet_name)
sheet.rows += self.xls_before_table
style_bold = sco_excel.excel_make_style(bold=True)
style_base = sco_excel.excel_make_style()
sheet.append_row(sheet.make_row(self.get_titles_list(), style_bold))
for line in self.get_data_list(xls_mode=True):
sheet.append_row(sheet.make_row(line, style_base))
if self.caption:
sheet.append_blank_row() # empty line
sheet.append_single_cell_row(self.caption, style_base)
if self.origin:
sheet.append_blank_row() # empty line
sheet.append_single_cell_row(self.origin, style_base)
if wb is None:
return sheet.generate()
def text(self):
"raw text representation of the table"
if self.text_with_titles:
headline = [self.get_titles_list()]
else:
headline = []
return "\n".join(
[
self.text_fields_separator.join([str(x) for x in line])
for line in headline + self.get_data_list()
]
)
def pdf(self):
"PDF representation: returns a ReportLab's platypus Table instance"
r = []
try:
sco_pdf.PDFLOCK.acquire()
r = self._pdf()
finally:
sco_pdf.PDFLOCK.release()
return r
def _pdf(self):
"""PDF representation: returns a list of ReportLab's platypus objects
(notably a Table instance)
"""
if not self.pdf_table_style:
LINEWIDTH = 0.5
self.pdf_table_style = [
("FONTNAME", (0, 0), (-1, 0), self.preferences["SCOLAR_FONT"]),
("LINEBELOW", (0, 0), (-1, 0), LINEWIDTH, Color(0, 0, 0)),
("GRID", (0, 0), (-1, -1), LINEWIDTH, Color(0, 0, 0)),
("VALIGN", (0, 0), (-1, -1), "TOP"),
]
nb_cols = len(self.columns_ids)
if self.rows and "row_title" in self.rows[0]:
nb_cols += 1
if not self.pdf_col_widths:
self.pdf_col_widths = (None,) * nb_cols
#
CellStyle = styles.ParagraphStyle({})
CellStyle.fontSize = self.preferences["SCOLAR_FONT_SIZE"]
CellStyle.fontName = self.preferences["SCOLAR_FONT"]
CellStyle.leading = 1.0 * self.preferences["SCOLAR_FONT_SIZE"] # vertical space
LINEWIDTH = 0.5
#
# titles = ["<para><b>%s</b></para>" % x for x in self.get_titles_list()]
pdf_style_list = []
data_list = self.get_data_list(
pdf_mode=True,
pdf_style_list=pdf_style_list,
with_titles=True,
omit_hidden_lines=True,
)
try:
Pt = []
for line in data_list:
Pt.append(
[
Paragraph(SU(str(x)), CellStyle)
if (not isinstance(x, Paragraph))
else x
for x in line
]
)
except ValueError as exc:
raise ScoPDFFormatError(str(exc)) from exc
pdf_style_list += self.pdf_table_style
T = Table(Pt, repeatRows=1, colWidths=self.pdf_col_widths, style=pdf_style_list)
objects = []
StyleSheet = styles.getSampleStyleSheet()
if self.pdf_title:
objects.append(Paragraph(SU(self.pdf_title), StyleSheet["Heading3"]))
if self.caption:
objects.append(Paragraph(SU(self.caption), StyleSheet["Normal"]))
objects.append(Spacer(0, 0.4 * cm))
objects.append(T)
return objects
def xml(self):
"""XML representation of the table.
The schema is very simple:
<table origin="" id="" caption="">
<row title="">
<column_id value=""/>
</row>
</table>
The tag names <table> and <row> can be changed using
xml_outer_tag and xml_row_tag
"""
doc = ElementTree.Element(
self.xml_outer_tag,
id=str(self.table_id),
origin=self.origin or "",
caption=self.caption or "",
)
for row in self.rows:
x_row = ElementTree.Element(self.xml_row_tag)
row_title = row.get("row_title", "")
if row_title:
x_row.set("title", row_title)
doc.append(x_row)
for cid in self.columns_ids:
v = row.get(cid, "")
if v is None:
v = ""
x_cell = ElementTree.Element(str(cid), value=str(v))
x_row.append(x_cell)
return sco_xml.XML_HEADER + ElementTree.tostring(doc).decode(scu.SCO_ENCODING)
def json(self):
"""JSON representation of the table."""
d = []
for row in self.rows:
r = {}
for cid in self.columns_ids:
v = row.get(cid, None)
# if v != None:
# v = str(v)
r[cid] = v
d.append(r)
return json.dumps(d, cls=scu.ScoDocJSONEncoder)
def make_page(
self,
title="",
format="html",
page_title="",
filename=None,
javascripts=[],
with_html_headers=True,
publish=True,
init_qtip=False,
):
"""
Build page at given format
This is a simple page with only a title and the table.
If not publish, does not set response header
"""
if not filename:
filename = self.filename
page_title = page_title or self.page_title
html_title = self.html_title or title
if format == "html":
H = []
if with_html_headers:
H.append(
self.html_header
or html_sco_header.sco_header(
page_title=page_title,
javascripts=javascripts,
init_qtip=init_qtip,
)
)
if html_title:
H.append(html_title)
H.append(self.html())
if with_html_headers:
H.append(html_sco_header.sco_footer())
return "\n".join(H)
elif format == "pdf":
pdf_objs = self.pdf()
pdf_doc = sco_pdf.pdf_basic_page(
pdf_objs, title=title, preferences=self.preferences
)
if publish:
return scu.send_file(
pdf_doc,
filename,
suffix=".pdf",
mime=scu.PDF_MIMETYPE,
)
else:
return pdf_doc
elif format == "xls" or format == "xlsx": # dans les 2 cas retourne du xlsx
xls = self.excel()
if publish:
return scu.send_file(
xls,
filename,
suffix=scu.XLSX_SUFFIX,
mime=scu.XLSX_MIMETYPE,
)
else:
return xls
elif format == "text":
return self.text()
elif format == "csv":
return scu.send_file(
self.text(),
filename,
suffix=".csv",
mime=scu.CSV_MIMETYPE,
attached=True,
)
elif format == "xml":
xml = self.xml()
if publish:
return scu.send_file(
xml, filename, suffix=".xml", mime=scu.XML_MIMETYPE
)
return xml
elif format == "json":
js = self.json()
if publish:
return scu.send_file(
js, filename, suffix=".json", mime=scu.JSON_MIMETYPE
)
return js
else:
log("make_page: format=%s" % format)
raise ValueError("_make_page: invalid format")
# -----
class SeqGenTable(object):
"""Sequence de GenTable: permet de générer un classeur excel avec un tab par table.
L'ordre des tabs est conservé (1er tab == 1ere table ajoutée)
"""
def __init__(self):
self.genTables = OrderedDict()
def add_genTable(self, name, gentable):
self.genTables[name] = gentable
def get_genTable(self, name):
return self.genTables.get(name)
def excel(self):
"""Export des genTables dans un unique fichier excel avec plusieurs feuilles tagguées"""
book = sco_excel.ScoExcelBook() # pylint: disable=no-member
for (_, gt) in self.genTables.items():
gt.excel(wb=book) # Ecrit dans un fichier excel
return book.generate()
# ----- Exemple d'utilisation minimal.
if __name__ == "__main__":
T = GenTable(
rows=[{"nom": "Hélène", "age": 26}, {"nom": "Titi&çà§", "age": 21}],
columns_ids=("nom", "age"),
)
print("--- HTML:")
print(T.gen(format="html"))
print("\n--- XML:")
print(T.gen(format="xml"))
print("\n--- JSON:")
print(T.gen(format="json"))
# Test pdf:
import io
from reportlab.platypus import KeepInFrame
from app.scodoc import sco_preferences, sco_pdf
preferences = sco_preferences.SemPreferences()
T.preferences = preferences
objects = T.gen(format="pdf")
objects = [KeepInFrame(0, 0, objects, mode="shrink")]
doc = io.BytesIO()
document = sco_pdf.BaseDocTemplate(doc)
document.addPageTemplates(
sco_pdf.ScoDocPageTemplate(
document,
)
)
document.build(objects)
data = doc.getvalue()
with open("/tmp/gen_table.pdf", "wb") as f:
f.write(data)
p = T.make_page(format="pdf")
with open("toto.pdf", "wb") as f:
f.write(p)