ScoDoc/app/scodoc/sco_xml.py

154 lines
5.0 KiB
Python
Raw Normal View History

2021-07-10 13:55:35 +02:00
# -*- mode: python -*-
# -*- coding: utf-8 -*-
##############################################################################
#
# Gestion scolarite IUT
#
2023-12-31 23:04:06 +01:00
# Copyright (c) 1999 - 2024 Emmanuel Viennet. All rights reserved.
2021-07-10 13:55:35 +02:00
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# Emmanuel Viennet emmanuel.viennet@viennet.net
#
##############################################################################
""" Exports XML
"""
from xml.etree import ElementTree
import xml.sax.saxutils
2021-08-01 15:17:33 +02:00
from xml.dom import minidom
2021-07-10 13:55:35 +02:00
2021-07-12 23:34:18 +02:00
from app.scodoc import sco_utils as scu
2021-07-10 13:55:35 +02:00
from app.scodoc.sco_vdi import ApoEtapeVDI
2021-07-12 23:34:18 +02:00
XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>"""
2021-07-10 13:55:35 +02:00
def remove_control_characters(s: str) -> str:
"""supprime tous les caractères de contrôle (code < 32)
sauf les tabs (9), les retours chariot (10 et les sauts de ligne (13).
"""
if not hasattr(remove_control_characters, "control_chars"):
control_chars = dict.fromkeys(i for i in range(32) if i not in (9, 10, 13))
remove_control_characters.control_chars = control_chars
return s.translate(remove_control_characters.control_chars)
2021-07-10 13:55:35 +02:00
def quote_xml_attr(data):
"""Escape &, <, >, quotes and double quotes"""
return xml.sax.saxutils.escape(str(data), {"'": "&apos;", '"': "&quot;"})
# ScoDoc7 legacy function:
2021-08-01 15:17:33 +02:00
def simple_dictlist2xml(dictlist, tagname=None, quote=False, pretty=True):
2021-07-10 13:55:35 +02:00
"""Represent a dict as XML data.
All keys with string or numeric values are attributes (numbers converted to strings).
All list values converted to list of childs (recursively).
*** all other values are ignored ! ***
Values (xml entities) are not quoted, except if requested by quote argument.
Exemple:
simple_dictlist2xml([ { 'id' : 1, 'ues' : [{'note':10},{}] } ], tagname='infos')
<?xml version="1.0" encoding="utf-8"?>
<infos id="1">
<ues note="10" />
<ues />
</infos>
"""
if not tagname:
raise ValueError("invalid empty tagname !")
elements = _dictlist2xml(dictlist, root=[], tagname=tagname, quote=quote)
2021-08-01 15:17:33 +02:00
ans = XML_HEADER + b"\n".join([ElementTree.tostring(x) for x in elements]).decode(
2021-07-12 23:34:18 +02:00
scu.SCO_ENCODING
)
2021-08-01 15:17:33 +02:00
if pretty:
# solution peu satisfaisante car on doit reparser le XML
# de plus, on encode/decode pour avoir le tag <?xml version="1.0" encoding="utf-8"?>
2021-08-01 15:33:08 +02:00
try:
ans = (
minidom.parseString(ans)
.toprettyxml(indent="\t", encoding="utf-8")
.decode("utf-8")
)
except xml.parsers.expat.ExpatError:
pass
2021-08-01 15:17:33 +02:00
return ans
2021-07-10 13:55:35 +02:00
def _repr_as_xml(v):
if isinstance(v, bool):
return str(int(v)) # booleans as "0" / "1"
return str(v)
2021-07-10 13:55:35 +02:00
def _dictlist2xml(dictlist, root=None, tagname=None, quote=False):
scalar_types = (bytes, str, int, float, bool)
2021-07-10 13:55:35 +02:00
for d in dictlist:
elem = ElementTree.Element(tagname)
root.append(elem)
if isinstance(d, (scalar_types, ApoEtapeVDI)):
elem.set("code", _repr_as_xml(d))
2021-07-10 13:55:35 +02:00
else:
if quote:
d_scalar = {
k: quote_xml_attr(_repr_as_xml(v))
for (k, v) in d.items()
if isinstance(v, scalar_types)
}
2021-07-10 13:55:35 +02:00
else:
d_scalar = {
k: _repr_as_xml(v)
for (k, v) in d.items()
if isinstance(v, scalar_types)
}
for k, v in sorted(d_scalar.items()):
elem.set(k, v)
d_list = {k: v for (k, v) in d.items() if isinstance(v, list)}
2021-07-10 13:55:35 +02:00
if d_list:
2023-12-31 23:04:06 +01:00
for k, v in d_list.items():
2021-07-10 13:55:35 +02:00
_dictlist2xml(v, tagname=k, root=elem, quote=quote)
return root
2021-07-13 09:38:31 +02:00
ELEMENT_NODE = 1
TEXT_NODE = 3
def xml_to_dicts(element):
"""Represent dom element as a dict
Example:
<foo x="1" y="2"><bar z="2"/></foo>
will give us:
('foo', {'y': '2', 'x': '1'}, [('bar', {'z': '2'}, [])])
"""
d = {}
# attributes
if element.attributes:
for i in range(len(element.attributes)):
a = element.attributes.item(i).nodeName
v = element.getAttribute(element.attributes.item(i).nodeName)
d[a] = v
# descendants
childs = []
for child in element.childNodes:
if child.nodeType == ELEMENT_NODE:
childs.append(xml_to_dicts(child))
2021-10-20 22:34:06 +02:00
return (element.nodeName, d, childs)