10/11/2008

Python: getInfoFromXmlString

0 comments



from xlm.dom import minidom

def getInfoFromXmlString(xmltxt):
'''
getInfoFromXmlString:

>>> xmltxt ='<date1 type="datetime" start="now">2005-08-11T13:43</date1>'
>>> getInfoFromXmlString(xmltxt)
(u'date1', u'2005-08-11T13:43', {u'start': u'now', u'type': u'datetime'})

>>> getInfoFromXmlString('<zoomified/>')
(u'zoomified', '', {})

python 2.5
10/11/08

'''


doms = minidom.parseString(xmltxt)

# >>> txt ='<date type="datetime" start="now">2005-08-11T13:43</date>'
# >>> doms = minidom.parseString(txt)
# >>> doms
# <xml.dom.minidom.Document instance at 0x02965C60>
# >>> doms.childNodes
# [<DOM Element: date at 0x2965d50>]

elem = doms.childNodes[0]
# <DOM Element: date at 0x2965d50> # <---- dom element <date>


# >>> elem.childNodes
# [<DOM Text node "2005-08-11...">]
# dText=elem.childNodes[0]
# <DOM Text node "2005-08-11...">
# >>> dText.nodeValue
# u'2005-08-11T13:43' # <---- contained text of the 1st Text Node
elemvalue = ''.join(x.nodeValue for x in elem.childNodes)

attrs = elem.attributes
# <xml.dom.minidom.NamedNodeMap object at 0x029820F8>
attrs = attrs.items()
# [(u'start', u'now'), (u'type', u'datetime')]

elemattrs = {}
elemattrs.update(attrs)
# {u'start': u'now', u'type': u'datetime'}

return elem.tagName, elemvalue, elemattrs

''' getInfoFromXmlString test:

xmltxt ='<date1 type="datetime" start="now">2005-08-11T13:43</date1>'
print getInfoFromXmlString(xmltxt)
# (u'date1', u'2005-08-11T13:43', {u'start': u'now', u'type': u'datetime'})
print getInfoFromXmlString('<zoomified/>')
# (u'zoomified', '', {})

'''