from xml.dom import minidom
dom = minidom.parse('items.xml')elements = dom.getElementsByTagName('item')
print(f"There are {len(elements)} items:")
for element in elements:print(element.attributes['name'].value)
try:import cElementTree as ETexcept ImportError:try:# Python 2.5 need to import a different moduleimport xml.etree.cElementTree as ETexcept ImportError:exit_err("Failed to import cElementTree from any known place")
def find_in_tree(tree, node):found = tree.find(node)if found == None:print "No %s in file" % nodefound = []return found
# Parse a xml file (specify the path)def_file = "xml_file_name.xml"try:dom = ET.parse(open(def_file, "r"))root = dom.getroot()except:exit_err("Unable to open and parse input definition file: " + def_file)
# Parse to find the child nodes list of node 'myNode'fwdefs = find_in_tree(root,"myNode")
library time spacexml.dom.minidom (Python 2.1) 6.3 s 80000Kgnosis.objectify 2.0 s 22000kxml.dom.minidom (Python 2.4) 1.4 s 53000kElementTree 1.2 1.6 s 14500kElementTree 1.2.4/1.3 1.1 s 14500kcDomlette (C extension) 0.540 s 20500kPyRXPU (C extension) 0.175 s 10850klibxml2 (C extension) 0.098 s 16000kreadlines (read as utf-8) 0.093 s 8850kcElementTree (C extension) --> 0.047 s 4900K <--readlines (read as ascii) 0.032 s 5050k
import xml.etree.cElementTree as ET
tree = ET.parse("foo.xml")root = tree.getroot()root_tag = root.tagprint(root_tag)
for form in root.findall("./bar/type"):x=(form.attrib)z=list(x)for i in z:print(x[i])
from benedict import benedict as bdict
# data-source can be an url, a filepath or data-string (as in this example)data_source = """<foo><bar><type foobar="1"/><type foobar="2"/></bar></foo>"""
data = bdict.from_xml(data_source)t_list = data['foo.bar'] # yes, keypath supportedfor t in t_list:print(t['@foobar'])
#If the xml is in the form of a string as shown below thenfrom lxml import etree, objectify'''sample xml as a string with a name space {http://xmlns.abc.com}'''message =b'<?xml version="1.0" encoding="UTF-8"?>\r\n<pa:Process xmlns:pa="http://xmlns.abc.com">\r\n\t<pa:firsttag>SAMPLE</pa:firsttag></pa:Process>\r\n' # this is a sample xml which is a string
print('************message coversion and parsing starts*************')
message=message.decode('utf-8')message=message.replace('<?xml version="1.0" encoding="UTF-8"?>\r\n','') #replace is used to remove unwanted strings from the 'message'message=message.replace('pa:Process>\r\n','pa:Process>')print (message)
print ('******Parsing starts*************')parser = etree.XMLParser(remove_blank_text=True) #the name space is removed hereroot = etree.fromstring(message, parser) #parsing of xml happens hereprint ('******Parsing completed************')
dict={}for child in root: # parsed xml is iterated using a for loop and values are stored in a dictionaryprint(child.tag,child.text)print('****Derving from xml tree*****')if child.tag =="{http://xmlns.abc.com}firsttag":dict["FIRST_TAG"]=child.textprint(dict)
### output'''************message coversion and parsing starts*************<pa:Process xmlns:pa="http://xmlns.abc.com">
<pa:firsttag>SAMPLE</pa:firsttag></pa:Process>******Parsing starts*******************Parsing completed************{http://xmlns.abc.com}firsttag SAMPLE****Derving from xml tree*****{'FIRST_TAG': 'SAMPLE'}'''
from lxml import etree, objectifymetadata = 'C:\\Users\\PROCS.xml' # this is sample xml file the contents are shown aboveparser = etree.XMLParser(remove_blank_text=True) # this line removes the name space from the xml in this sample the name space is --> http://ssssstree = etree.parse(metadata, parser) # this line parses the xml file which is PROCS.xmlroot = tree.getroot() # we get the root of xml which is process and iterate using a for loopfor elem in root.getiterator():if not hasattr(elem.tag, 'find'): continue # (1)i = elem.tag.find('}')if i >= 0:elem.tag = elem.tag[i+1:]
dict={} # a python dictionary is declaredfor elem in tree.iter(): #iterating through the xml tree using a for loopif elem.tag =="firsttag": # if the tag name matches the name that is equated then the text in the tag is stored into the dictionarydict["FIRST_TAG"]=str(elem.text)print(dict)
import re
def getdict(content):res=re.findall("<(?P<var>\S*)(?P<attr>[^/>]*)(?:(?:>(?P<val>.*?)</(?P=var)>)|(?:/>))",content)if len(res)>=1:attreg="(?P<avr>\S+?)(?:(?:=(?P<quote>['\"])(?P<avl>.*?)(?P=quote))|(?:=(?P<avl1>.*?)(?:\s|$))|(?P<avl2>[\s]+)|$)"if len(res)>1:return [{i[0]:[{"@attributes":[{j[0]:(j[2] or j[3] or j[4])} for j in re.findall(attreg,i[1].strip())]},{"$values":getdict(i[2])}]} for i in res]else:return {res[0]:[{"@attributes":[{j[0]:(j[2] or j[3] or j[4])} for j in re.findall(attreg,res[1].strip())]},{"$values":getdict(res[2])}]}else:return content
with open("test.xml","r") as f:print(getdict(f.read().replace('\n','')))