Entering edit mode
9.4 years ago
prasanna55kt
•
0
I want to parse xml file to csv by python. I am following xml->json and json to csv flow.
I am not able to convert tag having same name into csv. My code taking only once skipping remaining tags having same name. Please help on the same below. My code is not taking tag name having identical name can anyone help me to resolve this issues
Code:
import xml.etree.ElementTree as et
import json
import csv
import sys
import codecs
import os
class xml2json:
def __init__(self, input_file, output_file = None, encoding='utf-8'):
"""Initialize the class with the paths to the input xml file
and the output json file
"""
# open the xml file for iteration
self.context = et.iterparse(input_file, events=("start", "end"))
self.output_file = output_file
self.encoding = encoding
def get_json(self, pretty=True):
"""
Convert an XML file to json string
"""
self.context = iter(self.context)
event, root = self.context.next()
return self._elem2json(root, pretty)
def convert(self, pretty=True):
"""
Convert xml file to a json file
"""
json = self.get_json(pretty)
# output file handle
try:
output = codecs.open('json_temp.json', "w", encoding=self.encoding)
except:
print("Failed to open the output file")
raise
output.write(json)
output.close()
def _elem2list(self, elem):
"""Convert an ElementTree element to a list"""
block = {}
# get the element's children
children = elem.getchildren()
if children:
cur = map(self._elem2list, children)
# create meaningful lists
scalar = False
try:
if elem[0].tag != elem[1].tag: # [{a: 1}, {b: 2}, {c: 3}] => {a: 1, b: 2, c: 3}
cur = dict(zip(
map(lambda e: e.keys()[0], cur),
map(lambda e: e.values()[0], cur)
))
else:
scalar = True
except Exception as e: # [{a: 1}, {a: 2}, {a: 3}] => {a: [1, 2, 3]}
scalar = True
if scalar:
if len(cur) > 0:
cur = {elem[0].tag: [e.values()[0] for e in cur if e.values()[0] is not None]}
else:
cur = {elem[0].tag: cur[0].values()[0] }
block[elem.tag] = cur
else:
val = None
if elem.text:
val = elem.text.strip()
val = val if len(val) > 0 else None
elif elem.attrib:
val = elem.attrib
val = val if len(val) > 0 else None
block[elem.tag] = val
return block
def _elem2json(self, elem, pretty=True):
"""
Convert an ElementTree Element (root) to json
"""
# if the given Element is not the root element, find it
if hasattr(elem, 'getroot'):
elem = elem.getroot()
return json.dumps(self._elem2list(elem), indent=(4 if pretty else None))
#ni=open('json_temp.json','wb')
converter = xml2json(sys.argv[1], encoding="utf-8")
converter.convert()
def change(row, pastkeys=()):
result = {}
for key in row:
newkey = pastkeys + (key,)
val = row[key]
if isinstance(val, dict):
result.update(change(val, newkey))
elif isinstance(val, list):
result.update(change(dict(zip(range(0, len(val)), val)), newkey))
else:
result[newkey] = val
return result
# Get the JSON object, ensuring that we have a list of objects
##lines = list(sys.argv[2])
a=open('json_temp.json','r')
lines=list(a)
b= open(sys.argv[2],'w')
try:
data = json.loads(''.join(lines))
if isinstance(data, dict):
data = [data]
except ValueError:
data = [json.loads(line) for line in lines]
# change into keys
result = []
fields = set()
for row in data:
hash = change(row)
fields |= set(hash.keys())
result.append(hash)
# Write as CSV
fields = sorted(fields)
out = csv.writer(b, lineterminator='\n')
out.writerow(['-'.join([str(f) for f in field]) for field in fields])
for row in result:
out.writerow([(row.get(field, ''))for field in fields])
a.close()
os.remove('json_temp.json')