1
0
forked from ScoDoc/ScoDoc
ScoDoc/app/scodoc/gen_tables.py

826 lines
29 KiB
Python

# -*- mode: python -*-
# -*- coding: utf-8 -*-
##############################################################################
#
# Gestion scolarite IUT
#
# Copyright (c) 1999 - 2024 Emmanuel Viennet. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Emmanuel Viennet emmanuel.viennet@viennet.net
#
##############################################################################
"""Géneration de tables aux formats XHTML, PDF, Excel, XML et JSON.
Les données sont fournies comme une liste de dictionnaires, chaque élément de
cette liste décrivant une ligne du tableau.
Chaque colonne est identifiée par une clé du dictionnaire.
Voir exemple en fin de ce fichier.
Les clés commençant par '_' sont réservées. Certaines altèrent le traitement, notamment
pour spécifier les styles de mise en forme.
Par exemple, la clé '_css_row_class' spécifie le style CSS de la ligne.
"""
import random
from collections import OrderedDict
from xml.etree import ElementTree
import json
from typing import Any
from urllib.parse import urlparse, urlencode, parse_qs, urlunparse
from openpyxl.utils import get_column_letter
import reportlab
from reportlab.platypus import Paragraph, Spacer
from reportlab.platypus import Table, KeepInFrame
from reportlab.lib.colors import Color
from reportlab.lib import styles
from reportlab.lib.units import cm
from flask import render_template
from app.scodoc import html_sco_header
from app.scodoc import sco_utils as scu
from app.scodoc import sco_excel
from app.scodoc import sco_pdf
from app.scodoc import sco_xml
from app.scodoc.sco_exceptions import ScoPDFFormatError
from app.scodoc.sco_pdf import SU
from app import log, ScoDocJSONEncoder
def mark_paras(items: list[Any], tags: list[str]) -> list[str]:
"""Put each string element of items between <tag>...</tag>,
for each supplied tag.
Leave non string elements untouched.
"""
for tag in tags:
start = "<" + tag + ">"
end = "</" + tag.split()[0] + ">"
items = [(start + (x or "") + end) if isinstance(x, str) else x for x in items]
return items
def add_query_param(url: str, key: str, value: str) -> str:
"add parameter key=value to the given URL"
# Parse the URL
parsed_url = urlparse(url)
# Parse the query parameters
query_params = parse_qs(parsed_url.query)
# Add or update the query parameter
query_params[key] = [value]
# Encode the query parameters
encoded_query_params = urlencode(query_params, doseq=True)
# Construct the new URL
new_url_parts = parsed_url._replace(query=encoded_query_params)
new_url = urlunparse(new_url_parts)
return new_url
class DEFAULT_TABLE_PREFERENCES(object):
"""Default preferences for tables created without preferences argument"""
values = {
"SCOLAR_FONT": "Helvetica", # used for PDF, overriden by preferences argument
"SCOLAR_FONT_SIZE": 10,
"SCOLAR_FONT_SIZE_FOOT": 6,
"bul_pdf_with_background": False,
}
def __getitem__(self, k):
return self.values[k]
class GenTable:
"""Simple 2D tables with export to HTML, PDF, Excel, CSV.
Can be sub-classed to generate fancy formats.
"""
default_css_class = "gt_table stripe cell-border compact hover order-column"
def __init__(
self,
rows=[{}], # liste de dict { column_id : value }
columns_ids=[], # id des colonnes a afficher, dans l'ordre
titles={}, # titres (1ere ligne)
bottom_titles={}, # titres derniere ligne (optionnel)
caption=None,
page_title="", # titre fenetre html
pdf_link=True,
xls_link=True,
xml_link=False,
table_id=None, # for html and xml
html_class=None, # class de l'element <table> (en plus des classes par defaut,
html_class_ignore_default=False, # sauf si html_class_ignore_default est vrai)
html_sortable=False,
html_highlight_n=2, # une ligne sur 2 de classe "gt_hl"
html_col_width=None, # force largeur colonne
html_generate_cells=True, # generate empty <td> cells even if not in rows (useless?)
html_title="", # avant le tableau en html
html_caption=None, # override caption if specified
html_header=None,
html_next_section="", # html fragment to put after the table
html_with_td_classes=False, # put class=column_id in each <td>
html_before_table="", # html snippet to put before the <table> in the page
html_empty_element="", # replace table when empty
html_table_attrs="", # for html
base_url=None,
origin=None, # string added to excel and xml versions
filename="table", # filename, without extension
xls_sheet_name="feuille",
xls_before_table=[], # liste de cellules a placer avant la table
xls_style_base=None, # style excel pour les cellules
xls_columns_width=None, # { col_id : largeur en "pixels excel" }
pdf_title="", # au dessus du tableau en pdf
pdf_table_style=None,
pdf_col_widths=None,
xml_outer_tag="table",
xml_row_tag="row",
text_with_titles=False, # CSV with header line
text_fields_separator="\t",
preferences=None,
):
self.rows = rows # [ { col_id : value } ]
self.columns_ids = columns_ids # ordered list of col_id
self.titles = titles # { col_id : title }
self.bottom_titles = bottom_titles
self.origin = origin
self.base_url = base_url
self.filename = filename
self.caption = caption
self.html_header = html_header
self.html_before_table = html_before_table
self.html_empty_element = html_empty_element
self.html_table_attrs = html_table_attrs
self.page_title = page_title
self.pdf_link = pdf_link
self.xls_link = xls_link
self.xls_style_base = xls_style_base
self.xls_columns_width = xls_columns_width or {}
self.xml_link = xml_link
# HTML parameters:
if not table_id: # random id
log("Warning: GenTable() called without table_id")
self.table_id = "gt_" + str(random.randint(0, 1000000))
else:
self.table_id = table_id
self.html_generate_cells = html_generate_cells
self.html_title = html_title
self.html_caption = html_caption
self.html_next_section = html_next_section
self.html_with_td_classes = html_with_td_classes
if html_class is None:
html_class = self.default_css_class
if html_class_ignore_default:
self.html_class = html_class
else:
self.html_class = self.default_css_class + " " + html_class
self.sortable = html_sortable
self.html_highlight_n = html_highlight_n
self.html_col_width = html_col_width
# XLS parameters
self.xls_sheet_name = xls_sheet_name
self.xls_before_table = xls_before_table
# PDF parameters
self.pdf_table_style = pdf_table_style
self.pdf_col_widths = pdf_col_widths
self.pdf_title = pdf_title
# XML parameters
self.xml_outer_tag = xml_outer_tag
self.xml_row_tag = xml_row_tag
# TEXT parameters
self.text_fields_separator = text_fields_separator
self.text_with_titles = text_with_titles
#
if preferences:
self.preferences = preferences
else:
self.preferences = DEFAULT_TABLE_PREFERENCES()
def __repr__(self):
return f"<gen_table( nrows={self.get_nb_rows()}, ncols={self.get_nb_cols()} )>"
def __len__(self):
return len(self.rows)
def get_nb_cols(self):
return len(self.columns_ids)
def get_nb_rows(self):
return len(self.rows)
def is_empty(self):
return len(self.rows) == 0
def get_data_list(
self,
with_titles=False,
with_lines_titles=True,
with_bottom_titles=True,
omit_hidden_lines=False,
pdf_mode=False, # apply special pdf reportlab processing
pdf_style_list=[], # modified: list of platypus table style commands
xls_mode=False, # get xls content if available
) -> list:
"table data as a list of lists (rows)"
T = []
line_num = 0 # line number in input data
out_line_num = 0 # line number in output list
if with_titles and self.titles:
l = []
if with_lines_titles:
if "row_title" in self.titles:
l = [self.titles["row_title"]]
T.append(l + [self.titles.get(cid, "") for cid in self.columns_ids])
for row in self.rows:
line_num += 1
l = []
if with_lines_titles:
if "row_title" in row:
l = [row["row_title"]]
if not (omit_hidden_lines and row.get("_hidden", False)):
colspan_count = 0
col_num = len(l)
for cid in self.columns_ids:
colspan_count -= 1
# if colspan_count > 0:
# continue # skip cells after a span
if pdf_mode and f"_{cid}_pdf" in row:
content = row[f"_{cid}_pdf"]
elif xls_mode and f"_{cid}_xls" in row:
content = row[f"_{cid}_xls"]
else:
content = row.get(cid, "")
# Convert None to empty string ""
content = "" if content is None else content
colspan = row.get(f"_{cid}_colspan", 0)
if colspan > 1:
pdf_style_list.append(
(
"SPAN",
(col_num, out_line_num),
(col_num + colspan - 1, out_line_num),
)
)
colspan_count = colspan
l.append(content)
col_num += 1
if pdf_mode:
mk = row.get("_pdf_row_markup", []) # a list of tags
if mk:
l = mark_paras(l, mk)
T.append(l)
#
for cmd in row.get("_pdf_style", []): # relocate line numbers
pdf_style_list.append(
(
cmd[0],
(cmd[1][0], cmd[1][1] + out_line_num),
(cmd[2][0], cmd[2][1] + out_line_num),
)
+ cmd[3:]
)
out_line_num += 1
if with_bottom_titles and self.bottom_titles:
line_num += 1
l = []
if with_lines_titles:
if "row_title" in self.bottom_titles:
l = [self.bottom_titles["row_title"]]
T.append(l + [self.bottom_titles.get(cid, "") for cid in self.columns_ids])
return T
def get_titles_list(self):
"list of titles"
titles = [self.titles.get(cid, "") for cid in self.columns_ids]
if "row_title" in self.titles and "row_title" not in self.columns_ids:
titles.insert(0, self.titles["row_title"])
return titles
def gen(self, fmt="html", columns_ids=None):
"""Build representation of the table in the specified format.
See make_page() for more sophisticated output.
"""
if fmt == "html":
return self.html()
elif fmt == "xls" or fmt == "xlsx":
return self.excel()
elif fmt == "text" or fmt == "csv":
return self.text()
elif fmt == "pdf":
return self.pdf()
elif fmt == "xml":
return self.xml()
elif fmt == "json":
return self.json()
raise ValueError(f"GenTable: invalid format: {fmt}")
def _gen_html_row(self, row, line_num=0, elem="td", css_classes=""):
"row is a dict, returns a string <tr...>...</tr>"
if not row:
return "<tr></tr>" # empty row
if self.html_col_width:
std = ' style="width:%s;"' % self.html_col_width
else:
std = ""
cla = css_classes + " " + row.get("_css_row_class", "")
if line_num % self.html_highlight_n:
cls = ' class="gt_hl %s"' % cla
else:
if cla:
cls = ' class="%s"' % cla
else:
cls = ""
H = ["<tr%s %s>" % (cls, row.get("_tr_attrs", ""))]
# titre ligne
if "row_title" in row:
content = str(row["row_title"])
help = row.get("row_title_help", "")
if help:
content = '<a class="discretelink" href="" title="%s">%s</a>' % (
help,
content,
)
H.append('<th class="gt_linetit">' + content + "</th>")
r = []
colspan_count = 0
for cid in self.columns_ids:
if not cid in row and not self.html_generate_cells:
continue # skip cell
colspan_count -= 1
if colspan_count > 0:
continue # skip cells after a span
content = row.get("_" + str(cid) + "_html", row.get(cid, ""))
if content is None:
content = ""
else:
content = str(content)
help = row.get("_%s_help" % cid, "")
if help:
target = row.get("_%s_target" % cid, "#")
else:
target = row.get("_%s_target" % cid, "")
cell_id = row.get("_%s_id" % cid, None)
if cell_id:
idstr = ' id="%s"' % cell_id
else:
idstr = ""
cell_link_class = row.get("_%s_link_class" % cid, "discretelink")
if help or target:
content = '<a class="%s" href="%s" title="%s"%s>%s</a>' % (
cell_link_class,
target,
help,
idstr,
content,
)
klass = row.get("_%s_class" % cid, "")
if self.html_with_td_classes:
c = cid
else:
c = ""
if c or klass:
klass = ' class="%s"' % (" ".join((klass, c)))
else:
klass = ""
colspan = row.get("_%s_colspan" % cid, 0)
if colspan > 1:
colspan_txt = ' colspan="%d" ' % colspan
colspan_count = colspan
else:
colspan_txt = ""
attrs = row.get("_%s_td_attrs" % cid, "")
order = row.get(f"_{cid}_order")
if order:
attrs += f' data-order="{order}"'
r.append(
"<%s%s %s%s%s>%s</%s>"
% (
elem,
std,
attrs,
klass,
colspan_txt,
content,
elem,
)
)
H.append("".join(r) + "</tr>")
return "".join(H)
def html(self):
"Simple HTML representation of the table"
if self.is_empty() and self.html_empty_element:
return self.html_empty_element + "\n" + self.html_next_section
hid = ' id="%s"' % self.table_id
tablclasses = []
if self.html_class:
tablclasses.append(self.html_class)
if self.sortable:
tablclasses.append("sortable")
if tablclasses:
cls = ' class="%s"' % " ".join(tablclasses)
else:
cls = ""
H = [self.html_before_table, f"<table{hid}{cls} {self.html_table_attrs}>"]
line_num = 0
# thead
H.append("<thead>")
if self.titles:
H.append(
self._gen_html_row(
self.titles, line_num, elem="th", css_classes="gt_firstrow"
)
)
# autres lignes à placer dans la tête:
for row in self.rows:
if row.get("_table_part") == "head":
line_num += 1
H.append(self._gen_html_row(row, line_num)) # uses td elements
H.append("</thead>")
H.append("<tbody>")
for row in self.rows:
if row.get("_table_part", "body") == "body":
line_num += 1
H.append(self._gen_html_row(row, line_num))
H.append("</tbody>")
H.append("<tfoot>")
for row in self.rows:
if row.get("_table_part") == "foot":
line_num += 1
H.append(self._gen_html_row(row, line_num))
if self.bottom_titles:
H.append(
self._gen_html_row(
self.bottom_titles,
line_num + 1,
elem="th",
css_classes="gt_lastrow sortbottom",
)
)
H.append("</tfoot>")
H.append("</table>")
caption = self.html_caption or self.caption
if caption or self.base_url:
H.append('<p class="gt_caption">')
if caption:
H.append(caption)
if self.base_url:
H.append('<span class="gt_export_icons">')
if self.xls_link:
H.append(
f""" <a href="{add_query_param(self.base_url, "fmt", "xls")
}">{scu.ICON_XLS}</a>"""
)
if self.xls_link and self.pdf_link:
H.append("&nbsp;")
if self.pdf_link:
H.append(
f""" <a href="{add_query_param(self.base_url, "fmt", "pdf")
}">{scu.ICON_PDF}</a>"""
)
H.append("</span>")
H.append("</p>")
H.append(self.html_next_section)
return "\n".join(H)
def excel(self, wb=None):
"""Simple Excel representation of the table"""
if wb is None:
sheet = sco_excel.ScoExcelSheet(sheet_name=self.xls_sheet_name, wb=wb)
else:
sheet = wb.create_sheet(sheet_name=self.xls_sheet_name)
sheet.rows += self.xls_before_table
style_bold = sco_excel.excel_make_style(bold=True)
style_base = self.xls_style_base or sco_excel.excel_make_style()
sheet.append_row(sheet.make_row(self.get_titles_list(), style_bold))
for line in self.get_data_list(xls_mode=True):
sheet.append_row(sheet.make_row(line, style_base))
if self.caption:
sheet.append_blank_row() # empty line
sheet.append_single_cell_row(self.caption, style_base)
if self.origin:
sheet.append_blank_row() # empty line
sheet.append_single_cell_row(self.origin, style_base)
# Largeurs des colonnes
columns_ids = list(self.columns_ids)
for col_id, width in self.xls_columns_width.items():
try:
idx = columns_ids.index(col_id)
col = get_column_letter(idx + 1)
sheet.set_column_dimension_width(col, width)
except ValueError:
pass
if wb is None:
return sheet.generate()
def text(self):
"raw text representation of the table"
if self.text_with_titles:
headline = [self.get_titles_list()]
else:
headline = []
return "\n".join(
[
self.text_fields_separator.join([str(x) for x in line])
for line in headline + self.get_data_list()
]
)
def pdf(self) -> list:
"PDF representation: returns a list of ReportLab's platypus objects"
r = []
try:
sco_pdf.PDFLOCK.acquire()
r = self._pdf()
finally:
sco_pdf.PDFLOCK.release()
return r
def _pdf(self) -> list:
"""PDF representation: returns a list of ReportLab's platypus objects
(notably a Table instance)
"""
LINEWIDTH = 0.5
if not self.pdf_table_style:
self.pdf_table_style = [
("FONTNAME", (0, 0), (-1, 0), self.preferences["SCOLAR_FONT"]),
("LINEBELOW", (0, 0), (-1, 0), LINEWIDTH, Color(0, 0, 0)),
("GRID", (0, 0), (-1, -1), LINEWIDTH, Color(0, 0, 0)),
("VALIGN", (0, 0), (-1, -1), "TOP"),
]
nb_cols = len(self.columns_ids)
if self.rows and "row_title" in self.rows[0]:
nb_cols += 1
if not self.pdf_col_widths:
self.pdf_col_widths = (None,) * nb_cols
#
CellStyle = styles.ParagraphStyle({})
CellStyle.fontSize = self.preferences["SCOLAR_FONT_SIZE"]
CellStyle.fontName = self.preferences["SCOLAR_FONT"]
CellStyle.leading = 1.0 * self.preferences["SCOLAR_FONT_SIZE"] # vertical space
#
# titles = ["<para><b>%s</b></para>" % x for x in self.get_titles_list()]
pdf_style_list = []
data_list = self.get_data_list(
pdf_mode=True,
pdf_style_list=pdf_style_list,
with_titles=True,
omit_hidden_lines=True,
)
try:
Pt = []
for line in data_list:
Pt.append(
[
(
Paragraph(SU(str(x)), CellStyle)
if (not isinstance(x, Paragraph))
else x
)
for x in line
]
)
except ValueError as exc:
raise ScoPDFFormatError(str(exc)) from exc
pdf_style_list += self.pdf_table_style
T = Table(Pt, repeatRows=1, colWidths=self.pdf_col_widths, style=pdf_style_list)
objects = []
StyleSheet = styles.getSampleStyleSheet()
if self.pdf_title:
objects.append(Paragraph(SU(self.pdf_title), StyleSheet["Heading3"]))
if self.caption:
objects.append(Paragraph(SU(self.caption), StyleSheet["Normal"]))
objects.append(Spacer(0, 0.4 * cm))
objects.append(T)
return objects
def xml(self):
"""XML representation of the table.
The schema is very simple:
<table origin="" id="" caption="">
<row title="">
<column_id value=""/>
</row>
</table>
The tag names <table> and <row> can be changed using
xml_outer_tag and xml_row_tag
"""
doc = ElementTree.Element(
self.xml_outer_tag,
id=str(self.table_id),
origin=self.origin or "",
caption=self.caption or "",
)
for row in self.rows:
x_row = ElementTree.Element(self.xml_row_tag)
row_title = row.get("row_title", "")
if row_title:
x_row.set("title", row_title)
doc.append(x_row)
for cid in self.columns_ids:
v = row.get(cid, "")
if v is None:
v = ""
x_cell = ElementTree.Element(str(cid), value=str(v))
x_row.append(x_cell)
return sco_xml.XML_HEADER + ElementTree.tostring(doc).decode(scu.SCO_ENCODING)
def json(self):
"""JSON representation of the table."""
d = []
for row in self.rows:
r = {}
for cid in self.columns_ids:
v = row.get(cid, None)
# if v != None:
# v = str(v)
r[cid] = v
d.append(r)
return json.dumps(d, cls=ScoDocJSONEncoder)
def make_page(
self,
title="",
fmt="html",
page_title="",
filename=None,
javascripts=(),
with_html_headers=True,
publish=True,
):
"""
Build page at given format
This is a simple page with only a title and the table.
If not publish, do not set response header for non HTML formats.
If with_html_headers, render a full page using ScoDoc template.
"""
if not filename:
filename = self.filename
page_title = page_title or self.page_title
html_title = self.html_title or title
if fmt == "html":
H = []
if html_title:
H.append(html_title)
H.append(self.html())
if with_html_headers:
return render_template(
"sco_page.j2",
content="\n".join(H),
title=page_title,
javascripts=javascripts,
)
return "\n".join(H)
elif fmt == "pdf":
pdf_objs = self.pdf()
pdf_doc = sco_pdf.pdf_basic_page(
pdf_objs, title=title, preferences=self.preferences
)
if publish:
return scu.send_file(
pdf_doc,
filename,
suffix=".pdf",
mime=scu.PDF_MIMETYPE,
)
else:
return pdf_doc
elif fmt in ("xls", "xlsx"): # dans les 2 cas retourne du xlsx
xls = self.excel()
if publish:
return scu.send_file(
xls,
filename,
suffix=scu.XLSX_SUFFIX,
mime=scu.XLSX_MIMETYPE,
)
return xls
elif fmt == "text":
return self.text()
elif fmt == "csv":
return scu.send_file(
self.text(),
filename,
suffix=".csv",
mime=scu.CSV_MIMETYPE,
attached=True,
)
elif fmt == "xml":
xml = self.xml()
if publish:
return scu.send_file(
xml, filename, suffix=".xml", mime=scu.XML_MIMETYPE
)
return xml
elif fmt == "json":
js = self.json()
if publish:
return scu.send_file(
js, filename, suffix=".json", mime=scu.JSON_MIMETYPE
)
return js
else:
log(f"make_page: format={fmt}")
raise ValueError("_make_page: invalid format")
# -----
class SeqGenTable(object):
"""Sequence de GenTable: permet de générer un classeur excel avec un tab par table.
L'ordre des tabs est conservé (1er tab == 1ere table ajoutée)
"""
def __init__(self):
self.genTables = OrderedDict()
def add_genTable(self, name, gentable):
self.genTables[name] = gentable
def get_genTable(self, name):
return self.genTables.get(name)
def excel(self):
"""Export des genTables dans un unique fichier excel avec plusieurs feuilles tagguées"""
book = sco_excel.ScoExcelBook() # pylint: disable=no-member
for _, gt in self.genTables.items():
gt.excel(wb=book) # Ecrit dans un fichier excel
return book.generate()
# ----- Exemple d'utilisation minimal.
if __name__ == "__main__":
table = GenTable(
rows=[{"nom": "Hélène", "age": 26}, {"nom": "Titi&çà§", "age": 21}],
columns_ids=("nom", "age"),
)
print("--- HTML:")
print(table.gen(fmt="html"))
print("\n--- XML:")
print(table.gen(fmt="xml"))
print("\n--- JSON:")
print(table.gen(fmt="json"))
# Test pdf:
import io
from app.scodoc import sco_preferences
preferences = sco_preferences.SemPreferences()
table.preferences = preferences
objects = table.gen(fmt="pdf")
objects = [KeepInFrame(0, 0, objects, mode="shrink")]
doc = io.BytesIO()
document = sco_pdf.BaseDocTemplate(doc)
document.addPageTemplates(
sco_pdf.ScoDocPageTemplate(
document,
)
)
try:
document.build(objects)
except (ValueError, KeyError, reportlab.platypus.doctemplate.LayoutError) as exc:
raise ScoPDFFormatError(str(exc)) from exc
data = doc.getvalue()
with open("/tmp/gen_table.pdf", "wb") as f:
f.write(data)
p = table.make_page(fmt="pdf")
with open("toto.pdf", "wb") as f:
f.write(p)