ScoDoc/app/scodoc/sco_xml.py

# -*- mode: python -*-
# -*- coding: utf-8 -*-

##############################################################################
#
# Gestion scolarite IUT
#
# Copyright (c) 1999 - 2024 Emmanuel Viennet.  All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#   Emmanuel Viennet      emmanuel.viennet@viennet.net
#
##############################################################################


""" Exports XML
"""

from xml.etree import ElementTree
import xml.sax.saxutils
from xml.dom import minidom

from app.scodoc import sco_utils as scu
from app.scodoc.sco_vdi import ApoEtapeVDI

XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>"""


def remove_control_characters(s: str) -> str:
    """supprime tous les caractères de contrôle (code < 32)
    sauf les tabs (9), les retours chariot (10 et les sauts de ligne (13).
    """
    if not hasattr(remove_control_characters, "control_chars"):
        control_chars = dict.fromkeys(i for i in range(32) if i not in (9, 10, 13))
        remove_control_characters.control_chars = control_chars
    return s.translate(remove_control_characters.control_chars)


def quote_xml_attr(data):
    """Escape &, <, >, quotes and double quotes"""
    return xml.sax.saxutils.escape(str(data), {"'": "&apos;", '"': "&quot;"})


# ScoDoc7 legacy function:
def simple_dictlist2xml(dictlist, tagname=None, quote=False, pretty=True):
    """Represent a dict as XML data.
    All keys with string or numeric values are attributes (numbers converted to strings).
    All list values converted to list of childs (recursively).
    *** all other values are ignored ! ***
    Values (xml entities) are not quoted, except if requested by quote argument.

    Exemple:
     simple_dictlist2xml([ { 'id' : 1, 'ues' : [{'note':10},{}] } ], tagname='infos')

    <?xml version="1.0" encoding="utf-8"?>
    <infos id="1">
      <ues note="10" />
      <ues />
    </infos>

    """
    if not tagname:
        raise ValueError("invalid empty tagname !")
    elements = _dictlist2xml(dictlist, root=[], tagname=tagname, quote=quote)
    ans = XML_HEADER + b"\n".join([ElementTree.tostring(x) for x in elements]).decode(
        scu.SCO_ENCODING
    )
    if pretty:
        # solution peu satisfaisante car on doit reparser le XML
        # de plus, on encode/decode pour avoir le tag <?xml version="1.0" encoding="utf-8"?>
        try:
            ans = (
                minidom.parseString(ans)
                .toprettyxml(indent="\t", encoding="utf-8")
                .decode("utf-8")
            )
        except xml.parsers.expat.ExpatError:
            pass
    return ans


def _repr_as_xml(v):
    if isinstance(v, bool):
        return str(int(v))  # booleans as "0" / "1"
    return str(v)


def _dictlist2xml(dictlist, root=None, tagname=None, quote=False):
    scalar_types = (bytes, str, int, float, bool)
    for d in dictlist:
        elem = ElementTree.Element(tagname)
        root.append(elem)
        if isinstance(d, (scalar_types, ApoEtapeVDI)):
            elem.set("code", _repr_as_xml(d))
        else:
            if quote:
                d_scalar = {
                    k: quote_xml_attr(_repr_as_xml(v))
                    for (k, v) in d.items()
                    if isinstance(v, scalar_types)
                }
            else:
                d_scalar = {
                    k: _repr_as_xml(v)
                    for (k, v) in d.items()
                    if isinstance(v, scalar_types)
                }
            for k, v in sorted(d_scalar.items()):
                elem.set(k, v)
            d_list = {k: v for (k, v) in d.items() if isinstance(v, list)}
            if d_list:
                for k, v in d_list.items():
                    _dictlist2xml(v, tagname=k, root=elem, quote=quote)
    return root


ELEMENT_NODE = 1
TEXT_NODE = 3


def xml_to_dicts(element):
    """Represent dom element as a dict
    Example:
       <foo x="1" y="2"><bar z="2"/></foo>
    will give us:
       ('foo', {'y': '2', 'x': '1'}, [('bar', {'z': '2'}, [])])
    """
    d = {}
    # attributes
    if element.attributes:
        for i in range(len(element.attributes)):
            a = element.attributes.item(i).nodeName
            v = element.getAttribute(element.attributes.item(i).nodeName)
            d[a] = v
    # descendants
    childs = []
    for child in element.childNodes:
        if child.nodeType == ELEMENT_NODE:
            childs.append(xml_to_dicts(child))
    return (element.nodeName, d, childs)
migration exports xml 2021-07-10 13:55:35 +02:00			`# -- mode: python --`
			`# -- coding: utf-8 --`

			`##############################################################################`
			`#`
			`# Gestion scolarite IUT`
			`#`
Version 9.6.73 + copyright 2024 2023-12-31 23:04:06 +01:00			`# Copyright (c) 1999 - 2024 Emmanuel Viennet. All rights reserved.`
migration exports xml 2021-07-10 13:55:35 +02:00			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation; either version 2 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program; if not, write to the Free Software`
			`# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA`
			`#`
			`# Emmanuel Viennet emmanuel.viennet@viennet.net`
			`#`
			`##############################################################################`


			`""" Exports XML`
			`"""`

			`from xml.etree import ElementTree`
			`import xml.sax.saxutils`
prettier xml exports 2021-08-01 15:17:33 +02:00			`from xml.dom import minidom`
migration exports xml 2021-07-10 13:55:35 +02:00
misc fixes 2021-07-12 23:34:18 +02:00			`from app.scodoc import sco_utils as scu`
migration exports xml 2021-07-10 13:55:35 +02:00			`from app.scodoc.sco_vdi import ApoEtapeVDI`

misc fixes 2021-07-12 23:34:18 +02:00			`XML_HEADER = """<?xml version="1.0" encoding="utf-8"?>"""`
migration exports xml 2021-07-10 13:55:35 +02:00

Import XML formations: filtre caractères de contrôles erronés 2024-09-27 22:37:54 +02:00			`def remove_control_characters(s: str) -> str:`
			`"""supprime tous les caractères de contrôle (code < 32)`
			`sauf les tabs (9), les retours chariot (10 et les sauts de ligne (13).`
			`"""`
			`if not hasattr(remove_control_characters, "control_chars"):`
			`control_chars = dict.fromkeys(i for i in range(32) if i not in (9, 10, 13))`
			`remove_control_characters.control_chars = control_chars`
			`return s.translate(remove_control_characters.control_chars)`


migration exports xml 2021-07-10 13:55:35 +02:00			`def quote_xml_attr(data):`
			`"""Escape &, <, >, quotes and double quotes"""`
			`return xml.sax.saxutils.escape(str(data), {"'": "'", '"': """})`


			`# ScoDoc7 legacy function:`
prettier xml exports 2021-08-01 15:17:33 +02:00			`def simple_dictlist2xml(dictlist, tagname=None, quote=False, pretty=True):`
migration exports xml 2021-07-10 13:55:35 +02:00			`"""Represent a dict as XML data.`
			`All keys with string or numeric values are attributes (numbers converted to strings).`
			`All list values converted to list of childs (recursively).`
			`* all other values are ignored ! *`
			`Values (xml entities) are not quoted, except if requested by quote argument.`

			`Exemple:`
			`simple_dictlist2xml([ { 'id' : 1, 'ues' : [{'note':10},{}] } ], tagname='infos')`

			`<?xml version="1.0" encoding="utf-8"?>`
			`<infos id="1">`
			`<ues note="10" />`
			`<ues />`
			`</infos>`

			`"""`
			`if not tagname:`
			`raise ValueError("invalid empty tagname !")`
			`elements = _dictlist2xml(dictlist, root=[], tagname=tagname, quote=quote)`
prettier xml exports 2021-08-01 15:17:33 +02:00			`ans = XML_HEADER + b"\n".join([ElementTree.tostring(x) for x in elements]).decode(`
misc fixes 2021-07-12 23:34:18 +02:00			`scu.SCO_ENCODING`
			`)`
prettier xml exports 2021-08-01 15:17:33 +02:00			`if pretty:`
			`# solution peu satisfaisante car on doit reparser le XML`
			`# de plus, on encode/decode pour avoir le tag <?xml version="1.0" encoding="utf-8"?>`
modified to use pytest 2021-08-01 15:33:08 +02:00			`try:`
			`ans = (`
			`minidom.parseString(ans)`
			`.toprettyxml(indent="\t", encoding="utf-8")`
			`.decode("utf-8")`
			`)`
			`except xml.parsers.expat.ExpatError:`
			`pass`
prettier xml exports 2021-08-01 15:17:33 +02:00			`return ans`
migration exports xml 2021-07-10 13:55:35 +02:00

Sépare les UE externes dans la pae édition programme 2021-10-22 23:09:15 +02:00			`def _repr_as_xml(v):`
			`if isinstance(v, bool):`
			`return str(int(v)) # booleans as "0" / "1"`
			`return str(v)`


migration exports xml 2021-07-10 13:55:35 +02:00			`def _dictlist2xml(dictlist, root=None, tagname=None, quote=False):`
Sépare les UE externes dans la pae édition programme 2021-10-22 23:09:15 +02:00			`scalar_types = (bytes, str, int, float, bool)`
migration exports xml 2021-07-10 13:55:35 +02:00			`for d in dictlist:`
			`elem = ElementTree.Element(tagname)`
			`root.append(elem)`
Import XML formations: filtre caractères de contrôles erronés 2024-09-27 22:37:54 +02:00			`if isinstance(d, (scalar_types, ApoEtapeVDI)):`
Sépare les UE externes dans la pae édition programme 2021-10-22 23:09:15 +02:00			`elem.set("code", _repr_as_xml(d))`
migration exports xml 2021-07-10 13:55:35 +02:00			`else:`
			`if quote:`
Export formation: fix regression and sort xml attributes to get stable results 2024-10-17 17:07:24 +02:00			`d_scalar = {`
			`k: quote_xml_attr(_repr_as_xml(v))`
			`for (k, v) in d.items()`
			`if isinstance(v, scalar_types)`
			`}`
migration exports xml 2021-07-10 13:55:35 +02:00			`else:`
Export formation: fix regression and sort xml attributes to get stable results 2024-10-17 17:07:24 +02:00			`d_scalar = {`
			`k: _repr_as_xml(v)`
			`for (k, v) in d.items()`
			`if isinstance(v, scalar_types)`
			`}`
			`for k, v in sorted(d_scalar.items()):`
			`elem.set(k, v)`
			`d_list = {k: v for (k, v) in d.items() if isinstance(v, list)}`
migration exports xml 2021-07-10 13:55:35 +02:00			`if d_list:`
Version 9.6.73 + copyright 2024 2023-12-31 23:04:06 +01:00			`for k, v in d_list.items():`
migration exports xml 2021-07-10 13:55:35 +02:00			`_dictlist2xml(v, tagname=k, root=elem, quote=quote)`
			`return root`
misc fixes 2021-07-13 09:38:31 +02:00

			`ELEMENT_NODE = 1`
			`TEXT_NODE = 3`


			`def xml_to_dicts(element):`
			`"""Represent dom element as a dict`
			`Example:`
			`<foo x="1" y="2"><bar z="2"/></foo>`
			`will give us:`
			`('foo', {'y': '2', 'x': '1'}, [('bar', {'z': '2'}, [])])`
			`"""`
			`d = {}`
			`# attributes`
			`if element.attributes:`
			`for i in range(len(element.attributes)):`
			`a = element.attributes.item(i).nodeName`
			`v = element.getAttribute(element.attributes.item(i).nodeName)`
			`d[a] = v`
			`# descendants`
			`childs = []`
			`for child in element.childNodes:`
			`if child.nodeType == ELEMENT_NODE:`
			`childs.append(xml_to_dicts(child))`
Fix etud_info xml quote 2021-10-20 22:34:06 +02:00			`return (element.nodeName, d, childs)`