Skip to content
174 changes: 101 additions & 73 deletions json2xml/dicttoxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

"""
Converts a Python dictionary or other native data type into a valid XML string.

Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other
number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and
dict-like objects) data types, with arbitrary nesting for the collections.
Expand Down Expand Up @@ -65,7 +64,6 @@ def get_xml_type(val):


def escape_xml(s: str) -> str:

if isinstance(s, str):
s = str(s) # avoid UnicodeDecodeError
s = s.replace("&", "&")
Expand Down Expand Up @@ -108,13 +106,17 @@ def make_valid_xml_name(key, attr: Dict[str, Any]):
return key, attr

# prepend a lowercase n if the key is numeric
if key.isdigit():
if isinstance(key, int) or key.isdigit():
return f"n{key}", attr

# replace spaces with underscores if that fixes the problem
if key_is_valid_xml(key.replace(" ", "_")):
return key.replace(" ", "_"), attr

# allow namespace prefixes + ignore @flat in key
if key_is_valid_xml(key.replace(":", "").replace("@flat", "")):
return key, attr

# key is still invalid - move it into a name attribute
attr["name"] = key
key = "key"
Expand All @@ -134,8 +136,10 @@ def default_item_func(parent):
def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"):
"""Routes the elements of an object to the right function to convert them
based on their data type"""

LOG.info(f'Inside convert(). obj type is: "{type(obj).__name__}", obj="{str(obj)}"')
LOG.info(f'Inside convert(). type(obj)="{type(obj).__name__}"')
# avoid cpu consuming object serialization => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' obj="{str(obj)}"')

item_name = item_func(parent)
# since bool is also a subtype of number.Number and int, the check for bool
Expand Down Expand Up @@ -171,18 +175,62 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"):
raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})")


def is_primitive_type(val):
t = get_xml_type(val)
return t in {'str', 'int', 'float', 'bool', 'number', 'null'}


def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList):
keys_str = ', '.join(str(key) for key in item)
LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"')
# avoid cpu consuming object serialization => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' item="{str(item)}"')

if attr_type:
attr["type"] = get_xml_type(item)
attr = item.pop("@attrs", attr) # update attr with custom @attr if exists
rawitem = item["@val"] if "@val" in item else item
if is_primitive_type(rawitem):
subtree = rawitem
else:
# we can not use convert_dict, because rawitem could be non-dict
subtree = convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name)
if item.get("@flat", False) or (parentIsList and not item_wrap):
return subtree
attrstring = make_attrstring(attr)
return f"<{item_name}{attrstring}>{subtree}</{item_name}>"


def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap):
if attr_type:
attr["type"] = get_xml_type(item)
flat = False
if item_name.endswith('@flat'):
item_name = item_name[0:-5]
flat = True
subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap)
if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap):
return subtree
attrstring = make_attrstring(attr)
return f"<{item_name}{attrstring}>{subtree}</{item_name}>"


def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap):
"""Converts a dict into an XML string."""
LOG.info(
f'Inside convert_dict(): obj type is: "{type(obj).__name__}", obj="{str(obj)}"'
)
keys_str = ', '.join(str(key) for key in obj)
LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"')
# avoid cpu consuming object serialization => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' obj="{str(obj)}"')

output = []
addline = output.append

for key, val in obj.items():
LOG.info(
f'Looping inside convert_dict(): key="{str(key)}", val="{str(val)}", type(val)="{type(val).__name__}"'
)
LOG.info(f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"')
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' val="{str(val)}"')

attr = {} if not ids else {"id": f"{get_unique_id(parent)}"}

Expand Down Expand Up @@ -214,31 +262,11 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap):
)

elif isinstance(val, dict):
if attr_type:
attr["type"] = get_xml_type(val)
dict_str = convert_dict(
val, ids, key, attr_type, item_func, cdata, item_wrap
)
attrstring = make_attrstring(attr)
addline(f"<{key}{attrstring}>{dict_str}</{key}>")

elif isinstance(val, collections.abc.Iterable) and val:
if attr_type:
attr["type"] = get_xml_type(val)
if (
isinstance(val[0], numbers.Number)
or isinstance(val[0], str)
and not item_wrap
):
addline(
convert_list(val, ids, key, attr_type, item_func, cdata, item_wrap)
)
else:
attrstring = make_attrstring(attr)
list_str = convert_list(
val, ids, key, attr_type, item_func, cdata, item_wrap
)
addline(f"<{key}{attrstring}>{list_str}</{key}>")
addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap, False))

elif isinstance(val, collections.abc.Iterable):
addline(list2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap))

elif not val:
addline(convert_none(key, val, attr_type, attr, cdata))

Expand All @@ -250,19 +278,27 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap):

def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap):
"""Converts a list into an XML string."""
LOG.info("Inside convert_list()")
LOG.info(f'Inside convert_list(): type(items)="{type(items).__name__}"')
# avoid cpu consuming object serialization => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' items="{str(items)}"')

output = []
addline = output.append

item_name = item_func(parent)
if item_name.endswith('@flat'):
item_name = item_name[:-5]
this_id = None
if ids:
this_id = get_unique_id(parent)

for i, item in enumerate(items):
LOG.info(
f'Looping inside convert_list(): item="{str(item)}", item_name="{item_name}", type="{type(item).__name__}"'
)
LOG.info(f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"')
# avoid cpu consuming object serialization => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' item="{str(item)}"')

attr = {} if not ids else {"id": f"{this_id}_{i + 1}"}

if isinstance(item, bool):
Expand Down Expand Up @@ -302,37 +338,10 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap):
)

elif isinstance(item, dict):
item_dict_str = convert_dict(
item,
ids,
parent,
attr_type,
item_func,
cdata,
item_wrap,
)
if not attr_type:
if item_wrap:
addline(f"<{item_name}>{item_dict_str}</{item_name}>")
else:
addline(f"{item_dict_str}")
else:
if item_wrap:
addline(f'<{item_name} type="dict">{item_dict_str}</{item_name}>')
else:
addline(f"{item_dict_str}")
addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, True))

elif isinstance(item, collections.abc.Iterable):
attrstring = make_attrstring(attr)
convert_list_str = convert_list(
item, ids, item_name, attr_type, item_func, cdata, item_wrap
)
if not attr_type:
addline(f"<{item_name} {attrstring}>{convert_list_str}</{item_name}>")
else:
addline(
f'<{item_name} type="list"{attrstring}>{convert_list_str}</{item_name}>'
)
addline(list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap))

elif item is None:
addline(convert_none(item_name, None, attr_type, attr, cdata))
Expand Down Expand Up @@ -391,6 +400,7 @@ def dicttoxml(
item_wrap=True,
item_func=default_item_func,
cdata=False,
xml_namespaces={}
):
"""Converts a python object into XML.
Arguments:
Expand All @@ -409,17 +419,35 @@ def dicttoxml(
Default is True
- cdata specifies whether string values should be wrapped in CDATA sections.
Default is False
- xml_namespaces is a dictionary where key is xmlns prefix and value the urn,
e.g. { 'flex': 'http://www.w3.org/flex/flexBase', 'xsl': "http://www.w3.org/1999/XSL/Transform"}
results in <root xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:flex="http://www.w3.org/flex/flexBase">
Default is {}

Dictionaries-keys with special char '@' has special meaning:
@attrs: This allows custom xml attributes: {'@attr':{'a':'b'}, 'x':'y'} results in <root a="b"><x>y</x></root>
@flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap.
@val: @attrs requires complex dict type. If primitive type should be used, then @val is used as key.
To add custom xml-attributes on a list {'list': [4, 5, 6]}, you do this:
{'list': {'@attrs': {'a':'b','c':'d'}, '@val': [4, 5, 6]}
which results in <list a="b" c="d"><item>4</item><item>5</item><item>6</item></list>
"""
LOG.info(
f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}", obj="{str(obj)}"'
)
LOG.info(f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}"')
# avoid cpu consuming object serialization (problem for large objects) => extra if
if LOG.getEffectiveLevel() <= logging.DEBUG:
LOG.debug(f' obj="{str(obj)}"')

output = []
namespacestr = ''
for prefix in xml_namespaces:
ns = xml_namespaces[prefix]
namespacestr += f' xmlns:{prefix}="{ns}"'
if root:
output.append('<?xml version="1.0" encoding="UTF-8" ?>')
output_elem = convert(
obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root
)
output.append(f"<{custom_root}>{output_elem}</{custom_root}>")
output.append(f"<{custom_root}{namespacestr}>{output_elem}</{custom_root}>")
else:
output.append(
convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="")
Expand Down
49 changes: 47 additions & 2 deletions tests/test_json2xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,59 @@ def test_read_boolean_data_from_json(self):
"""Test correct return for boolean types."""
data = readfromjson("examples/booleanjson.json")
result = json2xml.Json2xml(data).to_xml()
print(result)
dict_from_xml = xmltodict.parse(result)
print(dict_from_xml)
assert dict_from_xml["all"]["boolean"]["#text"] != 'True'
assert dict_from_xml["all"]["boolean"]["#text"] == 'true'
assert dict_from_xml["all"]["boolean_dict_list"]["item"][0]["boolean_dict"]["boolean"]["#text"] == 'true'
assert dict_from_xml["all"]["boolean_dict_list"]["item"][1]["boolean_dict"]["boolean"]["#text"] == 'false'
assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true'
assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false'

def test_read_boolean_data_from_json2(self):
"""Test correct return for boolean types."""
data = readfromjson("examples/booleanjson2.json")
result = json2xml.Json2xml(data).to_xml()
print(result)
dict_from_xml = xmltodict.parse(result)
assert dict_from_xml["all"]["item"][0]["#text"] != 'True'
assert dict_from_xml["all"]["item"][0]["#text"] == 'true'
print(dict_from_xml)
assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] != 'True'
assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true'
assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false'
assert dict_from_xml["all"]["number_array"]["item"][0]["#text"] == '1'
assert dict_from_xml["all"]["number_array"]["item"][1]["#text"] == '2'
assert dict_from_xml["all"]["number_array"]["item"][2]["#text"] == '3'
assert dict_from_xml["all"]["string_array"]["item"][0]["#text"] == 'a'
assert dict_from_xml["all"]["string_array"]["item"][1]["#text"] == 'b'
assert dict_from_xml["all"]["string_array"]["item"][2]["#text"] == 'c'

def test_dict2xml_with_namespaces(self):
data = {'ns1:node1': 'data in namespace 1', 'ns2:node2': 'data in namespace 2'}
namespaces = {'ns1': 'http://www.google.de/ns1', 'ns2': 'http://www.google.de/ns2'}
result = dicttoxml(data, attr_type=False, xml_namespaces=namespaces)
print(result)
assert b'<?xml version="1.0" encoding="UTF-8" ?>' \
b'<root xmlns:ns1="http://www.google.de/ns1" xmlns:ns2="http://www.google.de/ns2">' \
b'<ns1:node1>data in namespace 1</ns1:node1>' \
b'<ns2:node2>data in namespace 2</ns2:node2>' \
b'</root>' == result

def test_dict2xml_with_flat(self):
data = {'flat_list@flat': [1, 2, 3], 'non_flat_list': [4, 5, 6]}
result = dicttoxml(data, attr_type=False)
print(result)
assert b'<?xml version="1.0" encoding="UTF-8" ?>' \
b'<root><item>1</item><item>2</item><item>3</item>' \
b'<non_flat_list><item>4</item><item>5</item><item>6</item></non_flat_list>' \
b'</root>' == result

def test_dict2xml_with_val_and_custom_attr(self):
# in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key
data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1': 'myval1', 'myattr2': 'myval2'}, '@val': [4, 5, 6]}}
result = dicttoxml(data, attr_type=False)
print(result)
assert b'<?xml version="1.0" encoding="UTF-8" ?>' \
b'<root><list1><item>1</item><item>2</item><item>3</item></list1>' \
b'<list2 myattr1="myval1" myattr2="myval2"><item>4</item><item>5</item><item>6</item></list2>' \
b'</root>' == result