From 4412fea50d0d30de6aa6eb087c66923721cd3ac3 Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Wed, 13 Apr 2022 09:53:06 +0200 Subject: [PATCH 1/7] improvments on dicttoxml: - support of namespaces - moving full object serialization into loglevel debug (reduce cpu + out of mem on large objects) - more control parameters on xml generation: allow custom xml attributes via @attr + optionally omit encapsulating xml-nodes via @flat --- json2xml/dicttoxml.py | 151 ++++++++++++++++++++++-------------------- 1 file changed, 79 insertions(+), 72 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index d5e3f9f4..6382edad 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -2,7 +2,6 @@ """ Converts a Python dictionary or other native data type into a valid XML string. - Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and dict-like objects) data types, with arbitrary nesting for the collections. @@ -65,7 +64,6 @@ def get_xml_type(val): def escape_xml(s: str) -> str: - if isinstance(s, str): s = str(s) # avoid UnicodeDecodeError s = s.replace("&", "&") @@ -115,6 +113,10 @@ def make_valid_xml_name(key, attr: Dict[str, Any]): if key_is_valid_xml(key.replace(" ", "_")): return key.replace(" ", "_"), attr + # allow namespace prefixes + ignore @flat in key + if key_is_valid_xml(key.replace(":", "").replace("@flat", "")): + return key, attr + # key is still invalid - move it into a name attribute attr["name"] = key key = "key" @@ -134,8 +136,9 @@ def default_item_func(parent): def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): """Routes the elements of an object to the right function to convert them based on their data type""" - - LOG.info(f'Inside convert(). obj type is: "{type(obj).__name__}", obj="{str(obj)}"') + LOG.info(f'Inside convert(). type(obj)="{type(obj).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') item_name = item_func(parent) @@ -167,19 +170,49 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") +def is_primitive_type(val): + t = get_xml_type(val) + return t in {'str', 'int', 'float', 'bool', 'number', 'null'} + +def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): + keys_str = ', '.join(key for key in item) + LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') + + if attr_type: + attr["type"] = get_xml_type(item) + attr = item.pop("@attrs", attr) # update attr with custom @attr if exists + rawitem = item["@val"] if "@val" in item else item + subtree = rawitem if is_primitive_type(rawitem) else convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) # we can not use convert_dict, because rawitem could be non-dict + if item.get("@flat", False): return subtree + attrstring = make_attrstring(attr) + return f"<{item_name}{attrstring}>{subtree}" + +def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): + if attr_type: + attr["type"] = get_xml_type(item) + key_name = item_func(item_name) + if item_name.endswith('@flat'): item_name = item_name[0:-5] + subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap) + if key_name.endswith('@flat'): return subtree + if len(item)>0 and is_primitive_type(item[0]) and not item_wrap: return subtree + attrstring = make_attrstring(attr) + return f"<{item_name}{attrstring}>{subtree}" def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a dict into an XML string.""" - LOG.info( - f'Inside convert_dict(): obj type is: "{type(obj).__name__}", obj="{str(obj)}"' - ) + keys_str = ', '.join(key for key in obj) + LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') + output = [] addline = output.append for key, val in obj.items(): - LOG.info( - f'Looping inside convert_dict(): key="{str(key)}", val="{str(val)}", type(val)="{type(val).__name__}"' - ) + LOG.info(f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' val="{str(val)}"') attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} @@ -207,31 +240,11 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): addline(convert_bool(key, val, attr_type, attr, cdata)) elif isinstance(val, dict): - if attr_type: - attr["type"] = get_xml_type(val) - dict_str = convert_dict( - val, ids, key, attr_type, item_func, cdata, item_wrap - ) - attrstring = make_attrstring(attr) - addline(f"<{key}{attrstring}>{dict_str}") - - elif isinstance(val, collections.abc.Iterable) and val: - if attr_type: - attr["type"] = get_xml_type(val) - if ( - isinstance(val[0], numbers.Number) - or isinstance(val[0], str) - and not item_wrap - ): - addline( - convert_list(val, ids, key, attr_type, item_func, cdata, item_wrap) - ) - else: - attrstring = make_attrstring(attr) - list_str = convert_list( - val, ids, key, attr_type, item_func, cdata, item_wrap - ) - addline(f"<{key}{attrstring}>{list_str}") + addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) + + elif isinstance(val, collections.abc.Iterable): + addline(list2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) + elif not val: addline(convert_none(key, val, attr_type, attr, cdata)) @@ -243,19 +256,24 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a list into an XML string.""" - LOG.info("Inside convert_list()") + LOG.info(f'Inside convert_list(): type(items)="{type(items).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' items="{str(items)}"') + output = [] addline = output.append item_name = item_func(parent) + if item_name.endswith('@flat'): item_name = item_name[:-5] this_id = None if ids: this_id = get_unique_id(parent) for i, item in enumerate(items): - LOG.info( - f'Looping inside convert_list(): item="{str(item)}", item_name="{item_name}", type="{type(item).__name__}"' - ) + LOG.info(f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"') + # avoid cpu consuming object serialization => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') + attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} if isinstance(item, (numbers.Number, str)): if item_wrap: @@ -294,37 +312,10 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): addline(convert_bool(item_name, item, attr_type, attr, cdata)) elif isinstance(item, dict): - item_dict_str = convert_dict( - item, - ids, - parent, - attr_type, - item_func, - cdata, - item_wrap, - ) - if not attr_type: - if item_wrap: - addline(f"<{item_name}>{item_dict_str}") - else: - addline(f"{item_dict_str}") - else: - if item_wrap: - addline(f'<{item_name} type="dict">{item_dict_str}') - else: - addline(f"{item_dict_str}") + addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) elif isinstance(item, collections.abc.Iterable): - attrstring = make_attrstring(attr) - convert_list_str = convert_list( - item, ids, item_name, attr_type, item_func, cdata, item_wrap - ) - if not attr_type: - addline(f"<{item_name} {attrstring}>{convert_list_str}") - else: - addline( - f'<{item_name} type="list"{attrstring}>{convert_list_str}' - ) + addline(list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) elif item is None: addline(convert_none(item_name, None, attr_type, attr, cdata)) @@ -383,6 +374,7 @@ def dicttoxml( item_wrap=True, item_func=default_item_func, cdata=False, + xml_namespaces={} ): """Converts a python object into XML. Arguments: @@ -401,17 +393,32 @@ def dicttoxml( Default is True - cdata specifies whether string values should be wrapped in CDATA sections. Default is False + - xml_namespaces is a dictionary where key is xmlns prefix and value the urn, + e.g. { 'flex': 'http://www.w3.org/flex/flexBase', 'xsl': "http://www.w3.org/1999/XSL/Transform"} + will result in ... + Default is {} + + Dictionaries-keys with special char '@' has special meaning: + @attrs: This allows custom xml attributes. Sample {'@attr':{'a':'b'}, 'x':'y'} results in y + @flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap parameter for lists. + @val: @attrs required compelex dict type. If primitive type should be used, then @val is used as key. Sample {'@attr':{'a':'b'}, '@val':'y'} results in y + Esp. if item['x'] is primitive type, you can set: item['x'] = {'@val': item['x'], '@attrs':{'a':'b'}} """ - LOG.info( - f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}", obj="{str(obj)}"' - ) + LOG.info(f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}"') + # avoid cpu consuming object serialization (problem for large objects) => extra if + if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') + output = [] + namespacestr = '' + for prefix in xml_namespaces: + ns = xml_namespaces[prefix] + namespacestr += f' xmlns:{prefix}="{ns}"' if root: output.append('') output_elem = convert( obj, ids, attr_type, item_func, cdata, item_wrap, parent=custom_root ) - output.append(f"<{custom_root}>{output_elem}") + output.append(f"<{custom_root}{namespacestr}>{output_elem}") else: output.append( convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="") From bb518c3f8da7e118c72fbee1513bc01752c9793f Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Tue, 19 Apr 2022 10:17:28 +0200 Subject: [PATCH 2/7] fix item_wrap + merge handling of bool --- json2xml/dicttoxml.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 6382edad..e90b766b 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -142,6 +142,13 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): item_name = item_func(parent) + # since bool is also a subtype of number.Number and int, the check for bool + # never comes and hence we get wrong value for the xml type bool + # here, we just change order and check for bool first, because no other + # type other than bool can be true for bool check + if isinstance(obj, bool): + return convert_bool(item_name, obj, attr_type, cdata) + if isinstance(obj, (numbers.Number, str)): return convert_kv( key=item_name, val=obj, attr_type=attr_type, attr={}, cdata=cdata @@ -156,9 +163,6 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): cdata=cdata, ) - if isinstance(obj, bool): - return convert_bool(item_name, obj, attr_type, cdata) - if obj is None: return convert_none(item_name, "", attr_type, cdata) @@ -186,6 +190,7 @@ def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): rawitem = item["@val"] if "@val" in item else item subtree = rawitem if is_primitive_type(rawitem) else convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) # we can not use convert_dict, because rawitem could be non-dict if item.get("@flat", False): return subtree + if not item_wrap: return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" @@ -218,7 +223,14 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): key, attr = make_valid_xml_name(key, attr) - if isinstance(val, (numbers.Number, str)): + # since bool is also a subtype of number.Number and int, the check for bool + # never comes and hence we get wrong value for the xml type bool + # here, we just change order and check for bool first, because no other + # type other than bool can be true for bool check + if isinstance(val, bool): + addline(convert_bool(key, val, attr_type, attr, cdata)) + + elif isinstance(val, (numbers.Number, str)): addline( convert_kv( key=key, val=val, attr_type=attr_type, attr=attr, cdata=cdata @@ -236,9 +248,6 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): ) ) - elif isinstance(val, bool): - addline(convert_bool(key, val, attr_type, attr, cdata)) - elif isinstance(val, dict): addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) From 5a75248e56867ad8e053ea10736edb1748079bea Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Tue, 19 Apr 2022 11:02:20 +0200 Subject: [PATCH 3/7] fix item_wrap --- json2xml/dicttoxml.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index e90b766b..e57708b7 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -178,7 +178,7 @@ def is_primitive_type(val): t = get_xml_type(val) return t in {'str', 'int', 'float', 'bool', 'number', 'null'} -def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): +def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList): keys_str = ', '.join(key for key in item) LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') # avoid cpu consuming object serialization => extra if @@ -190,7 +190,7 @@ def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): rawitem = item["@val"] if "@val" in item else item subtree = rawitem if is_primitive_type(rawitem) else convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) # we can not use convert_dict, because rawitem could be non-dict if item.get("@flat", False): return subtree - if not item_wrap: return subtree + if parentIsList and not item_wrap: return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" @@ -249,7 +249,7 @@ def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): ) elif isinstance(val, dict): - addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) + addline(dict2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap, False)) elif isinstance(val, collections.abc.Iterable): addline(list2xml_str(attr_type, attr, val, item_func, cdata, key, item_wrap)) @@ -321,7 +321,7 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): addline(convert_bool(item_name, item, attr_type, attr, cdata)) elif isinstance(item, dict): - addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) + addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, True)) elif isinstance(item, collections.abc.Iterable): addline(list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap)) From 9d5958ffd303afa82c950195d8d9957aa09ca2a9 Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Wed, 20 Apr 2022 10:29:48 +0200 Subject: [PATCH 4/7] merge boolean fix + new tests --- examples/booleanjson.json | 8 ++++++++ examples/booleanjson2.json | 5 +++++ json2xml/dicttoxml.py | 9 +++++---- tests/test_json2xml.py | 19 +++++++++++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 examples/booleanjson.json create mode 100644 examples/booleanjson2.json diff --git a/examples/booleanjson.json b/examples/booleanjson.json new file mode 100644 index 00000000..a784c7b7 --- /dev/null +++ b/examples/booleanjson.json @@ -0,0 +1,8 @@ +{ + "boolean": true, + "boolean_dict_list": [ + {"boolean_dict": {"boolean": true}}, + {"boolean_dict": {"boolean": false}} + ], + "boolean_list": [true, false] +} diff --git a/examples/booleanjson2.json b/examples/booleanjson2.json new file mode 100644 index 00000000..95a35a8e --- /dev/null +++ b/examples/booleanjson2.json @@ -0,0 +1,5 @@ +{ + "boolean_list": [true, false], + "number_array": [1, 2, 3], + "string_array": ["a", "b", "c"] +} \ No newline at end of file diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index e57708b7..047fb06f 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -284,7 +284,11 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} - if isinstance(item, (numbers.Number, str)): + + if isinstance(item, bool): + addline(convert_bool(item_name, item, attr_type, attr, cdata)) + + elif isinstance(item, (numbers.Number, str)): if item_wrap: addline( convert_kv( @@ -317,9 +321,6 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): ) ) - elif isinstance(item, bool): - addline(convert_bool(item_name, item, attr_type, attr, cdata)) - elif isinstance(item, dict): addline(dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, True)) diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index bbf7ae49..e4acd7a6 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -176,3 +176,22 @@ def test_bad_data(self): with pytest.raises(InvalidDataError) as pytest_wrapped_e: json2xml.Json2xml(decoded).to_xml() assert pytest_wrapped_e.type == InvalidDataError + + def test_read_boolean_data_from_json(self): + """Test correct return for boolean types.""" + data = readfromjson("examples/booleanjson.json") + result = json2xml.Json2xml(data).to_xml() + dict_from_xml = xmltodict.parse(result) + assert dict_from_xml["all"]["boolean"]["#text"] != 'True' + assert dict_from_xml["all"]["boolean"]["#text"] == 'true' + + + def test_read_boolean_data_from_json2(self): + """Test correct return for boolean types.""" + data = readfromjson("examples/booleanjson2.json") + result = json2xml.Json2xml(data).to_xml() + print(result) + dict_from_xml = xmltodict.parse(result) + print(dict_from_xml) + assert dict_from_xml["all"]["item"][0]["#text"] != 'True' + assert dict_from_xml["all"]["item"][0]["#text"] == 'true' From 0a45e9eee298712aba65b93b18b29e35f127bb9b Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Wed, 20 Apr 2022 12:02:50 +0200 Subject: [PATCH 5/7] some more fixes to support non-string dict-keys + fixes with custom item_func + some new tests added --- json2xml/dicttoxml.py | 14 ++++++++------ tests/test_json2xml.py | 38 +++++++++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 9 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index 047fb06f..6720380a 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -106,7 +106,7 @@ def make_valid_xml_name(key, attr: Dict[str, Any]): return key, attr # prepend a lowercase n if the key is numeric - if key.isdigit(): + if isinstance(key, int) or key.isdigit(): return f"n{key}", attr # replace spaces with underscores if that fixes the problem @@ -179,7 +179,7 @@ def is_primitive_type(val): return t in {'str', 'int', 'float', 'bool', 'number', 'null'} def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList): - keys_str = ', '.join(key for key in item) + keys_str = ', '.join(str(key) for key in item) LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') @@ -197,17 +197,19 @@ def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): if attr_type: attr["type"] = get_xml_type(item) - key_name = item_func(item_name) - if item_name.endswith('@flat'): item_name = item_name[0:-5] + flat = False + if item_name.endswith('@flat'): + item_name = item_name[0:-5] + flat = True subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap) - if key_name.endswith('@flat'): return subtree + if flat: return subtree if len(item)>0 and is_primitive_type(item[0]) and not item_wrap: return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a dict into an XML string.""" - keys_str = ', '.join(key for key in obj) + keys_str = ', '.join(str(key) for key in obj) LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"') # avoid cpu consuming object serialization => extra if if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index e4acd7a6..c830dc57 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -181,10 +181,15 @@ def test_read_boolean_data_from_json(self): """Test correct return for boolean types.""" data = readfromjson("examples/booleanjson.json") result = json2xml.Json2xml(data).to_xml() + print(result) dict_from_xml = xmltodict.parse(result) + print(dict_from_xml) assert dict_from_xml["all"]["boolean"]["#text"] != 'True' assert dict_from_xml["all"]["boolean"]["#text"] == 'true' - + assert dict_from_xml["all"]["boolean_dict_list"]["item"][0]["boolean_dict"]["boolean"]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_dict_list"]["item"][1]["boolean_dict"]["boolean"]["#text"] == 'false' + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false' def test_read_boolean_data_from_json2(self): """Test correct return for boolean types.""" @@ -193,5 +198,32 @@ def test_read_boolean_data_from_json2(self): print(result) dict_from_xml = xmltodict.parse(result) print(dict_from_xml) - assert dict_from_xml["all"]["item"][0]["#text"] != 'True' - assert dict_from_xml["all"]["item"][0]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] != 'True' + assert dict_from_xml["all"]["boolean_list"]["item"][0]["#text"] == 'true' + assert dict_from_xml["all"]["boolean_list"]["item"][1]["#text"] == 'false' + assert dict_from_xml["all"]["number_array"]["item"][0]["#text"] == '1' + assert dict_from_xml["all"]["number_array"]["item"][1]["#text"] == '2' + assert dict_from_xml["all"]["number_array"]["item"][2]["#text"] == '3' + assert dict_from_xml["all"]["string_array"]["item"][0]["#text"] == 'a' + assert dict_from_xml["all"]["string_array"]["item"][1]["#text"] == 'b' + assert dict_from_xml["all"]["string_array"]["item"][2]["#text"] == 'c' + + def test_dict2xml_with_namespaces(self): + data = {'ns1:node1': 'data in namespace 1', 'ns2:node2': 'data in namespace 2'} + namespaces = {'ns1': 'http://www.google.de/ns1', 'ns2': 'http://www.google.de/ns2'} + result = dicttoxml(data, attr_type=False, xml_namespaces=namespaces) + print(result) + assert b'data in namespace 1data in namespace 2' == result + + def test_dict2xml_with_flat(self): + data = {'flat_list@flat': [1,2,3], 'non_flat_list': [4,5,6]} + result = dicttoxml(data, attr_type=False) + print(result) + assert b'123456' == result + + def test_dict2xml_with_val_and_custom_attr(self): + # in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key + data = {'list1': [1,2,3], 'list2': {'@attrs':{'myattr1':'myval1','myattr2':'myval2'}, '@val':[4,5,6]}} + result = dicttoxml(data, attr_type=False) + print(result) + assert b'123456' == result From cf7b0ddd8ad26d57dc0cde12aa26b684f8683917 Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Wed, 20 Apr 2022 14:12:25 +0200 Subject: [PATCH 6/7] improve code style for flake8 --- json2xml/dicttoxml.py | 54 ++++++++++++++++++++++++++++-------------- tests/test_json2xml.py | 20 ++++++++++++---- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py index b5711748..8c57a573 100755 --- a/json2xml/dicttoxml.py +++ b/json2xml/dicttoxml.py @@ -138,7 +138,8 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): based on their data type""" LOG.info(f'Inside convert(). type(obj)="{type(obj).__name__}"') # avoid cpu consuming object serialization => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') item_name = item_func(parent) # since bool is also a subtype of number.Number and int, the check for bool @@ -173,26 +174,34 @@ def convert(obj, ids, attr_type, item_func, cdata, item_wrap, parent="root"): raise TypeError(f"Unsupported data type: {obj} ({type(obj).__name__})") + def is_primitive_type(val): t = get_xml_type(val) return t in {'str', 'int', 'float', 'bool', 'number', 'null'} + def dict2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap, parentIsList): keys_str = ', '.join(str(key) for key in item) LOG.info(f'Inside dict_item2xml_str: type(obj)="{type(item).__name__}", keys="{keys_str}"') # avoid cpu consuming object serialization => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' item="{str(item)}"') if attr_type: attr["type"] = get_xml_type(item) attr = item.pop("@attrs", attr) # update attr with custom @attr if exists rawitem = item["@val"] if "@val" in item else item - subtree = rawitem if is_primitive_type(rawitem) else convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) # we can not use convert_dict, because rawitem could be non-dict - if item.get("@flat", False): return subtree - if parentIsList and not item_wrap: return subtree + if is_primitive_type(rawitem): + subtree = rawitem + else: + # we can not use convert_dict, because rawitem could be non-dict + subtree = convert(rawitem, ids, attr_type, item_func, cdata, item_wrap, item_name) + if item.get("@flat", False) or (parentIsList and not item_wrap): + return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" + def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): if attr_type: attr["type"] = get_xml_type(item) @@ -201,24 +210,27 @@ def list2xml_str(attr_type, attr, item, item_func, cdata, item_name, item_wrap): item_name = item_name[0:-5] flat = True subtree = convert_list(item, ids, item_name, attr_type, item_func, cdata, item_wrap) - if flat: return subtree - if len(item)>0 and is_primitive_type(item[0]) and not item_wrap: return subtree + if flat or (len(item) > 0 and is_primitive_type(item[0]) and not item_wrap): + return subtree attrstring = make_attrstring(attr) return f"<{item_name}{attrstring}>{subtree}" + def convert_dict(obj, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a dict into an XML string.""" keys_str = ', '.join(str(key) for key in obj) LOG.info(f'Inside convert_dict(): type(obj)="{type(obj).__name__}", keys="{keys_str}"') # avoid cpu consuming object serialization => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') output = [] addline = output.append for key, val in obj.items(): LOG.info(f'Looping inside convert_dict(): key="{str(key)}", type(val)="{type(val).__name__}"') - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' val="{str(val)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' val="{str(val)}"') attr = {} if not ids else {"id": f"{get_unique_id(parent)}"} @@ -268,13 +280,15 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): """Converts a list into an XML string.""" LOG.info(f'Inside convert_list(): type(items)="{type(items).__name__}"') # avoid cpu consuming object serialization => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' items="{str(items)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' items="{str(items)}"') output = [] addline = output.append item_name = item_func(parent) - if item_name.endswith('@flat'): item_name = item_name[:-5] + if item_name.endswith('@flat'): + item_name = item_name[:-5] this_id = None if ids: this_id = get_unique_id(parent) @@ -282,7 +296,8 @@ def convert_list(items, ids, parent, attr_type, item_func, cdata, item_wrap): for i, item in enumerate(items): LOG.info(f'Looping inside convert_list(): index="{str(i)}", type="{type(item).__name__}"') # avoid cpu consuming object serialization => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' item="{str(item)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' item="{str(item)}"') attr = {} if not ids else {"id": f"{this_id}_{i + 1}"} @@ -406,18 +421,21 @@ def dicttoxml( Default is False - xml_namespaces is a dictionary where key is xmlns prefix and value the urn, e.g. { 'flex': 'http://www.w3.org/flex/flexBase', 'xsl': "http://www.w3.org/1999/XSL/Transform"} - will result in ... + results in Default is {} Dictionaries-keys with special char '@' has special meaning: - @attrs: This allows custom xml attributes. Sample {'@attr':{'a':'b'}, 'x':'y'} results in y - @flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap parameter for lists. - @val: @attrs required compelex dict type. If primitive type should be used, then @val is used as key. Sample {'@attr':{'a':'b'}, '@val':'y'} results in y - Esp. if item['x'] is primitive type, you can set: item['x'] = {'@val': item['x'], '@attrs':{'a':'b'}} + @attrs: This allows custom xml attributes: {'@attr':{'a':'b'}, 'x':'y'} results in y + @flat: If a key ends with @flat (or dict contains key '@flat'), encapsulating node is omitted. Similar to item_wrap. + @val: @attrs requires complex dict type. If primitive type should be used, then @val is used as key. + To add custom xml-attributes on a list {'list': [4, 5, 6]}, you do this: + {'list': {'@attrs': {'a':'b','c':'d'}, '@val': [4, 5, 6]} + which results in 456 """ LOG.info(f'Inside dicttoxml(): type(obj) is: "{type(obj).__name__}"') # avoid cpu consuming object serialization (problem for large objects) => extra if - if LOG.getEffectiveLevel() <= logging.DEBUG: LOG.debug(f' obj="{str(obj)}"') + if LOG.getEffectiveLevel() <= logging.DEBUG: + LOG.debug(f' obj="{str(obj)}"') output = [] namespacestr = '' diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index c830dc57..fdb5be9d 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -213,17 +213,27 @@ def test_dict2xml_with_namespaces(self): namespaces = {'ns1': 'http://www.google.de/ns1', 'ns2': 'http://www.google.de/ns2'} result = dicttoxml(data, attr_type=False, xml_namespaces=namespaces) print(result) - assert b'data in namespace 1data in namespace 2' == result + assert b'' \ + b'' \ + b'data in namespace 1' \ + b'data in namespace 2' \ + b'' == result def test_dict2xml_with_flat(self): - data = {'flat_list@flat': [1,2,3], 'non_flat_list': [4,5,6]} + data = {'flat_list@flat': [1, 2, 3], 'non_flat_list': [4, 5, 6]} result = dicttoxml(data, attr_type=False) print(result) - assert b'123456' == result + assert b'' \ + b'123' \ + b'456' \ + b'' == result def test_dict2xml_with_val_and_custom_attr(self): # in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key - data = {'list1': [1,2,3], 'list2': {'@attrs':{'myattr1':'myval1','myattr2':'myval2'}, '@val':[4,5,6]}} + data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1':'myval1','myattr2':'myval2'}, '@val': [4, 5, 6]}} result = dicttoxml(data, attr_type=False) print(result) - assert b'123456' == result + assert b'' \ + b'123' \ + b'456' \ + b'' == result From fa15a8ec4601209124c62096c75f7320e609e14d Mon Sep 17 00:00:00 2001 From: "Dr. Dirk Richter" Date: Wed, 20 Apr 2022 14:14:58 +0200 Subject: [PATCH 7/7] improve code style for flake8 --- tests/test_json2xml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index fdb5be9d..b85f1d13 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -230,7 +230,7 @@ def test_dict2xml_with_flat(self): def test_dict2xml_with_val_and_custom_attr(self): # in order to use @attr in non-dict objects, we need to lift into a dict and combine with @val as key - data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1':'myval1','myattr2':'myval2'}, '@val': [4, 5, 6]}} + data = {'list1': [1, 2, 3], 'list2': {'@attrs': {'myattr1': 'myval1', 'myattr2': 'myval2'}, '@val': [4, 5, 6]}} result = dicttoxml(data, attr_type=False) print(result) assert b'' \